diff --git a/.gitignore b/.gitignore index 9a1c8bb..c8785e9 100644 --- a/.gitignore +++ b/.gitignore @@ -103,4 +103,3 @@ tests/plots/* tests/temp*.py data/populations/* *.mmap -tests/monte_carlo/* diff --git a/tests/monte_carlo/goodness_of_fit.py b/tests/monte_carlo/goodness_of_fit.py deleted file mode 100644 index a54ad03..0000000 --- a/tests/monte_carlo/goodness_of_fit.py +++ /dev/null @@ -1,294 +0,0 @@ -from weighted_quantiles import median -from scipy.stats import ks_2samp -import numpy as np -import os -import matplotlib.pyplot as plt - -from frbpoppy import unpickle, TNS, poisson_interval, pprint -from tests.rates.alpha_real import EXPECTED -from tests.convenience import plot_aa_style, rel_path -from simulations import SimulationOverview, POP_SIZE - -NORM_SURV = 'parkes-htru' - - -class GoodnessOfFit: - - def __init__(self): - self.run_pars = {1: ['alpha', 'si', 'li'], - 2: ['li', 'lum_min', 'lum_max'], - 3: ['w_mean', 'w_std'], - 4: ['dm_igm_slope', 'dm_host']} - self.norm_surv = NORM_SURV - self.so = SimulationOverview() - self.tns = self.get_tns() - - def get_tns(self): - # Only get one-offs - return TNS(repeaters=False, mute=True, update=False).df - - def dm(self, pop, survey_name): - """Calculate GoodnessOfFit for DM distributions.""" - mask = ((self.tns.survey == survey_name) & (self.tns.dm <= 950)) - try: - ks_dm = ks_2samp(pop.frbs.dm, self.tns[mask].dm)[1] - except ValueError: - ks_dm = np.nan - return ks_dm - - def snr(self, pop, survey_name): - mask = ((self.tns.survey == survey_name) & (self.tns.dm <= 950)) - try: - ks_snr = ks_2samp(pop.frbs.snr, self.tns[mask].snr)[1] - except ValueError: - ks_snr = np.nan - return ks_snr - - def rate(self, pop, survey_name, norm_uuid, run, errs=False): - # Add rate details - sr = pop.source_rate - surv_sim_rate = sr.det / sr.days - - # Perhaps use at some stage - if errs: - p_int = poisson_interval(sr.det, sigma=1) - surv_sim_rate_errs = [p/sr.days for p in p_int] - - # Determine ratio of detection rates - if survey_name in EXPECTED: - n_frbs, n_days = EXPECTED[survey_name] - else: - n_frbs, n_days = [np.nan, np.nan] - surv_real_rate = n_frbs/n_days - - # Get normalisation properties - norm_real_n_frbs, norm_real_n_days = EXPECTED[self.norm_surv] - norm_pop = unpickle(f'mc/run_{run}/{norm_uuid}') - norm_sim_n_frbs = norm_pop.source_rate.det - norm_sim_n_days = norm_pop.source_rate.days - norm_sim_rate = norm_sim_n_frbs / norm_sim_n_days - norm_real_rate = norm_real_n_frbs / norm_real_n_days - - if norm_sim_rate == 0: - norm_sim_rate = POP_SIZE / norm_sim_n_days - - sim_ratio = surv_sim_rate / norm_sim_rate - real_ratio = surv_real_rate / norm_real_rate - - diff = np.abs(sim_ratio - real_ratio) - if diff == 0: - rate_diff = 1e-3 - else: - rate_diff = 1 / diff - - return rate_diff, pop.n_sources() - - def calc_gofs(self, run): - - # For each requested run - self.so = SimulationOverview() - par_set = self.so.df[self.so.df.run == run].par_set.iloc[0] - pprint(f'Calculating goodness of fit for run {run}, par set {par_set}') - pars = self.run_pars[par_set] - values = [] - - # Loop through all combination of parameters - for values, group in self.so.df[self.so.df.run == run].groupby(pars): - pprint(f' - {list(zip(pars, values))}') - # Calculate goodness of fit values for each simulation - for row_ix, row in group.iterrows(): - survey_name = row.survey - uuid = row.uuid - pop = unpickle(f'mc/run_{run}/{uuid}') - - # Apply a DM cutoff - mask = (pop.frbs.dm <= 950) - pop.frbs.apply(mask) - pop.source_rate.det = pop.n_sources() * pop.source_rate.f_area - - dm_gof = self.dm(pop, survey_name) - snr_gof = self.snr(pop, survey_name) - self.so.df.at[row_ix, 'dm_gof'] = dm_gof - self.so.df.at[row_ix, 'snr_gof'] = snr_gof - - if pop.n_sources() == 0: - self.so.df.at[row_ix, 'weight'] = 0 - self.so.df.at[row_ix, 'n_det'] = pop.n_sources() - pprint(f' - No sources in {survey_name}') - continue - - # Find corresponding rate normalisation population uuid - norm_mask = dict(zip(pars, values)) - norm_mask['survey'] = self.norm_surv - norm_mask['run'] = run - k = norm_mask.keys() - v = norm_mask.values() - norm_uuid = group.loc[group[k].isin(v).all(axis=1), :].uuid - norm_uuid = norm_uuid.values[0] - rate_diff, n_det = self.rate(pop, survey_name, norm_uuid, run) - - # Get rate weighting - self.so.df.at[row_ix, 'weight'] = rate_diff - self.so.df.at[row_ix, 'n_det'] = n_det - - pprint(f'Saving the results for run {run}') - # Best matching in terms of rates - max_w = np.nanmax(self.so.df.weight) - self.so.df.loc[self.so.df.weight == 1e3]['weight'] = max_w - self.so.save() - - def plot(self, run): - # Get data - # For each requested run - df = self.so.df - par_set = df[df.run == run].par_set.iloc[0] - - # For each parameter - for main_par in self.run_pars[par_set]: - pprint(f'Plotting {main_par}') - other_pars = [e for e in self.run_pars[par_set] if e != main_par] - - for compare_par in ['dm', 'snr']: - compare_col = f'{compare_par}_gof' - - pprint(f' - {compare_col}') - for survey, group_surv in df[df.run == run].groupby('survey'): - - pprint(f' - {survey}') - - # Set up plot - plot_aa_style() - plt.rcParams["figure.figsize"] = (5.75373*3, 5.75373*3) - plt.rcParams['figure.max_open_warning'] = 125 - n_x = group_surv[other_pars[0]].nunique() - if len(other_pars) > 1: - n_y = group_surv[other_pars[1]].nunique() - else: - n_y = 1 - fig, ax = plt.subplots(n_x, n_y, - sharex='col', sharey='row') - - groups = group_surv.groupby(other_pars) - x = -1 - for i, (other_pars_vals, group) in enumerate(groups): - bins = group[main_par].values - values = group[compare_col].values - bins, values = self.add_edges_to_hist(bins, values) - - if n_y > 1: - y = i % n_y - if y == 0: - x += 1 - a = ax[y, x] - else: - y = i - a = ax[y] - - a.step(bins, values, where='mid') - a.set_title = str(other_pars_vals) - - diff = np.diff(bins) - if diff[1] != diff[0]: - a.set_xscale('log') - - # Set axis label - if y == n_y - 1: - p = other_pars[0] - if isinstance(other_pars_vals, float): - val = other_pars_vals - else: - val = other_pars_vals[0] - p = p.replace('_', ' ') - a.set_xlabel(f'{p} = {val:.2}') - - if x == 0: - p = other_pars[1] - val = other_pars_vals[1] - p = p.replace('_', ' ') - a.set_ylabel(f'{p} = {val:.2}') - - # Set axis limits - subset = df[df.run == run][main_par] - y_subset = group_surv[compare_col].copy() - try: - low = np.nanmin(y_subset) - high = np.nanmax(y_subset) - except ValueError: - low = 0.0001 - high = 1 - log = False - if low > 0 and high > 0: - log = True - - for a in ax.flatten(): - a.set_xlim(subset.min(), subset.max()) - if log: - a.set_yscale('log', nonposy='clip') - a.set_ylim(low, high) - - p = main_par.replace('_', ' ') - fig.suptitle(f'{p} - {compare_par} - {survey}') - plt.tight_layout() - plt.subplots_adjust(top=0.95) - - # Save to subdirectory - path_to_save = rel_path(f'./plots/mc/{main_par}_run{run}/') - if not os.path.isdir(path_to_save): - os.mkdir(path_to_save) - path_to_save += f'{compare_par}_{survey}.pdf' - plt.savefig(path_to_save) - plt.clf() - - def add_edges_to_hist(self, bins, n, bin_type='lin'): - """Add edges to histograms""" - - np.seterr(divide='ignore', invalid='ignore') - - if bin_type == 'lin': - bin_dif = np.diff(bins)[-1] - bins = np.insert(bins, 0, bins[0] - bin_dif) - bins = np.insert(bins, len(bins), bins[-1] + bin_dif) - else: - bin_dif = np.diff(np.log10(bins))[-1] - bins = np.insert(bins, 0, 10**(np.log10(bins[0])-bin_dif)) - bins = np.insert(bins, len(bins), 10**(np.log10(bins[-1])+bin_dif)) - - n = np.insert(n, 0, np.nan) - n = np.insert(n, len(n), np.nan) - - return bins, n - - def weighted_median(self, df): - dm_gof = df['dm_gof'].values - dm_weight = df['n_det'].values - snr_gof = df['snr_gof'].values - snr_weight = df['n_det'].values - gofs = np.concatenate([dm_gof, snr_gof]) - weights = np.concatenate([dm_weight, snr_weight]) - return median(gofs, weights) - - def calc_global_max(self, run): - self.so = SimulationOverview() - df = self.so.df[self.so.df.run == run] - par_set = df[df.run == run].par_set.iloc[0] - cols = self.run_pars[par_set] - values = [] - gofs = [] - - # Loop through all combination of parameters - for value, group in df.groupby(cols): - gof = self.weighted_median(group) - values.append(value) - gofs.append(gof) - - gofs = np.array(gofs) - - # Find maximum (best gof) - if np.isnan(gofs).all(): - return dict(zip(cols, [(np.nan, np.nan) for i in cols])) - else: - best_ix = np.nanargmax(gofs) - best_values = values[best_ix] - best_gofs = [gofs[best_ix]]*len(cols) - - return dict(zip(cols, zip(best_values, best_gofs))) diff --git a/tests/monte_carlo/plot.py b/tests/monte_carlo/plot.py deleted file mode 100644 index 11d80dd..0000000 --- a/tests/monte_carlo/plot.py +++ /dev/null @@ -1,400 +0,0 @@ -from frbpoppy import hist -from goodness_of_fit import GoodnessOfFit -from matplotlib.lines import Line2D -from scipy.optimize import curve_fit -from tests.convenience import plot_aa_style, rel_path -import matplotlib.pyplot as plt -import numpy as np - - -class Plot(): - """Plot runs.""" - - def __init__(self): - plot_aa_style() - plt.rcParams['figure.figsize'] = (5.75373, (5.75373/3)*4) - plt.rcParams['font.size'] = 9 - # plt.rcParams['xtick.major.pad'] = 10 - # plt.rcParams['ytick.major.pad'] = 10 - # plt.rcParams['axes.titlepad'] = 10 - self.fig, self.axes = plt.subplots(4, 3, sharey='row') - self.colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] - self.gf = GoodnessOfFit() - self.df = self.gf.so.df - - # Calculate global maximums - self.gm = {} - for run in self.df.run.unique(): - self.gm[run] = self.gf.calc_global_max(run) - print(self.gm) - - # Plot various subplots - self.alpha() - self.si() - self.li() - self.li_2() - self.lum_min() - self.lum_max() - self.w_int_mean() - self.w_int_std() - self.legend() - self.dm_igm_slope() - self.dm_host() - self.axes[3, 2].set_axis_off() - - # Plot run rectangles - # self.runs() - - # Save plot - plt.tight_layout() # rect=[0, 0, 0.98, 1]) - plt.subplots_adjust(wspace=0.1) - plt.savefig(rel_path('./plots/mc/mc.pdf')) - - def alpha(self): - ax = self.axes[0, 0] - parm = 'alpha' - label = r'$\alpha$' - runs = [1, 5, 8] - - ax.set_yscale('log', nonposy='clip') - ax.set_ylabel(r'GoF') - ax.set_xlabel(label) - best_value = np.nan - - # Plot runs - for i, run in enumerate(runs): - bins, gofs = self.get_data(run, parm) - ax.step(bins, gofs, where='mid') - - # Plot global maximum - try: - best_value, best_gof = self.gm[run][parm] - ax.plot([best_value]*2, [1e-1, best_gof], color=self.colors[i], linestyle='--') - ax.scatter([best_value], [best_gof], marker='x', color=self.colors[i]) - except KeyError: - i -= 1 - continue - - if i==1 and not np.isnan(best_value): - title = fr'{label}=${best_value:.1f}$' - ax.set_title(title, fontsize=10, color=self.colors[i]) - - def si(self): - ax = self.axes[0, 1] - parm = 'si' - label = r'\text{si}' - runs = [1, 5, 8] - - ax.set_yscale('log', nonposy='clip') - ax.set_xlabel(label) - best_value = np.nan - - # Plot runs - for i, run in enumerate(runs): - bins, gofs = self.get_data(run, parm) - ax.step(bins, gofs, where='mid') - - # Plot global maximum - try: - best_value, best_gof = self.gm[run][parm] - ax.plot([best_value]*2, [1e-1, best_gof], color=self.colors[i], linestyle='--') - ax.scatter([best_value], [best_gof], marker='x', color=self.colors[i]) - except KeyError: - i -= 1 - continue - - if i == 1 and not np.isnan(best_value): - title = fr'{label}=${best_value:.1f}$' - ax.set_title(title, fontsize=10, color=self.colors[i]) - - def li(self): - ax = self.axes[0, 2] - parm = 'li' - # label = r'\text{lum$_{\text{i}}$}' - label = 'li' - runs = [1, 5, 8] - - ax.set_yscale('log', nonposy='clip') - ax.set_xlabel(label) - ax_right = ax.twinx() - ax_right.set_ylabel('Set 1', labelpad=10) - ax_right.tick_params(axis='y', which='both', right=False, labelright=False) - best_value = np.nan - - # Plot runs - for i, run in enumerate(runs): - bins, gofs = self.get_data(run, parm) - ax.step(bins, gofs, where='mid') - - # Plot global maximum - try: - best_value, best_gof = self.gm[run][parm] - ax.plot([best_value]*2, [1e-1, best_gof], color=self.colors[i], linestyle='--') - ax.scatter([best_value], [best_gof], marker='x', color=self.colors[i]) - except KeyError: - i -= 1 - continue - - if i == 1 and not np.isnan(best_value): - title = fr'{label}=${best_value:.1f}$' - ax.set_title(title, fontsize=10, color=self.colors[i]) - - def li_2(self): - ax = self.axes[1, 0] - ax.set_ylabel(r'GoF') - parm = 'li' - label = 'li' - runs = [2] - - ax.set_yscale('log', nonposy='clip') - ax.set_xlabel(label) - best_value = np.nan - - # Plot runs - for i, run in enumerate(runs): - bins, gofs = self.get_data(run, parm) - ax.step(bins, gofs, where='mid') - - # Plot global maximum - try: - best_value, best_gof = self.gm[run][parm] - ax.plot([best_value]*2, [1e-1, best_gof], color=self.colors[i], linestyle='--') - ax.scatter([best_value], [best_gof], marker='x', color=self.colors[i]) - except KeyError: - i -= 1 - continue - - # if not np.isnan(best_value): - # title = fr'{label}=${best_value:.1f}$' - # ax.set_title(title, fontsize=10, color=self.colors[i]) - - def lum_min(self): - ax = self.axes[1, 1] - ax.set_xscale('log') - label = r'\text{lum$_{\text{min}}$}' - parm = 'lum_min' - runs = [2] - - ax.set_yscale('log', nonposy='clip') - ax.set_xlabel(label + r' (erg s$^{-1}$)') - best_value = np.nan - - # Plot runs - for i, run in enumerate(runs): - bins, gofs = self.get_data(run, parm) - ax.step(bins, gofs, where='mid') - - # Plot global maximum - try: - best_value, best_gof = self.gm[run][parm] - ax.plot([best_value]*2, [1e-1, best_gof], color=self.colors[i], linestyle='--') - ax.scatter([best_value], [best_gof], marker='x', color=self.colors[i]) - except KeyError: - i -= 1 - continue - - # if not np.isnan(best_value): - # title = fr'{label}=${best_value:.1e}$' - # ax.set_title(title, fontsize=10, color=self.colors[i]) - - def lum_max(self): - ax = self.axes[1, 2] - ax.set_xscale('log') - label = r'\text{lum$_{\text{max}}$}' - parm = 'lum_max' - runs = [2] - - ax.set_yscale('log', nonposy='clip') - ax.set_xlabel(label + r' (erg s$^{-1}$)') - ax_right = ax.twinx() - ax_right.set_ylabel('Set 2', labelpad=10) - ax_right.tick_params(axis='y', which='both', right=False, labelright=False) - best_value = np.nan - - # Plot runs - for i, run in enumerate(runs): - bins, gofs = self.get_data(run, parm) - ax.step(bins, gofs, where='mid') - - # Plot global maximum - try: - best_value, best_gof = self.gm[run][parm] - ax.plot([best_value]*2, [1e-1, best_gof], color=self.colors[i], linestyle='--') - ax.scatter([best_value], [best_gof], marker='x', color=self.colors[i]) - except KeyError: - i -= 1 - continue - - # if not np.isnan(best_value): - # title = fr'{label}=${best_value:.1e}$' - # ax.set_title(title, fontsize=10, color=self.colors[i]) - - def w_int_mean(self): - ax = self.axes[2, 0] - ax.set_xscale('log') - ax.set_ylabel(r'GoF') - label = r'\text{w$_{\text{int, mean}}$}' - parm = 'w_mean' - runs = [3, 6, 9] - - ax.set_yscale('log', nonposy='clip') - ax.set_xlabel(fr'{label} (ms)') - best_value = np.nan - - # Plot runs - for i, run in enumerate(runs): - bins, gofs = self.get_data(run, parm) - ax.step(bins, gofs, where='mid') - - # Plot global maximum - try: - best_value, best_gof = self.gm[run][parm] - ax.plot([best_value]*2, [1e-1, best_gof], color=self.colors[i], linestyle='--') - ax.scatter([best_value], [best_gof], marker='x', color=self.colors[i]) - except KeyError: - i -= 1 - continue - - if i == 1 and not np.isnan(best_value): - title = fr'{label}=${best_value:.1e}$' - ax.set_title(title, fontsize=10, color=self.colors[i]) - - def w_int_std(self): - ax = self.axes[2, 1] - label = r'\text{w$_{\text{int, std}}$}' - parm = 'w_std' - runs = [3, 6, 9] - - ax.set_yscale('log', nonposy='clip') - ax.set_xlabel(fr'{label} (ms)') - ax_right = ax.twinx() - ax_right.set_ylabel('Set 3', labelpad=10) - ax_right.tick_params(axis='y', which='both', right=False, labelright=False) - best_value = np.nan - - # Plot runs - for i, run in enumerate(runs): - bins, gofs = self.get_data(run, parm) - ax.step(bins, gofs, where='mid') - - # Plot global maximum - try: - best_value, best_gof = self.gm[run][parm] - ax.plot([best_value]*2, [1e-1, best_gof], color=self.colors[i], linestyle='--') - ax.scatter([best_value], [best_gof], marker='x', color=self.colors[i]) - except KeyError: - i -= 1 - continue - - if i == 1 and not np.isnan(best_value): - title = fr'{label}=${best_value:.1f}$' - ax.set_title(title, fontsize=10, color=self.colors[i]) - - def dm_igm_slope(self): - ax = self.axes[3, 0] - ax.set_ylabel(r'GoF') - label = r'\text{DM$_{\text{IGM, slope}}$}' - parm = 'dm_igm_slope' - runs = [4, 7, 10] - - ax.set_yscale('log', nonposy='clip') - ax.set_xlabel(label + r' ($\textrm{pc}\ \textrm{cm}^{-3}$)') - best_value = np.nan - - # Plot runs - for i, run in enumerate(runs): - bins, gofs = self.get_data(run, parm) - ax.step(bins, gofs, where='mid') - - # Plot global maximum - try: - best_value, best_gof = self.gm[run][parm] - ax.plot([best_value]*2, [1e-1, best_gof], color=self.colors[i], linestyle='--') - ax.scatter([best_value], [best_gof], marker='x', color=self.colors[i]) - except KeyError: - i -= 1 - continue - - if i == 1 and not np.isnan(best_value): - title = fr'{label}=${best_value:.0f}$' - ax.set_title(title, fontsize=10, color=self.colors[i]) - - def dm_host(self): - ax = self.axes[3, 1] - label = r'\text{DM$_{\text{Host}}$}' - parm = 'dm_host' - runs = [4, 7, 10] - - ax.set_yscale('log', nonposy='clip') - ax.set_xlabel(label + r' ($\textrm{pc}\ \textrm{cm}^{-3}$)') - ax_right = ax.twinx() - ax_right.set_ylabel('Set 4', labelpad=10) - ax_right.tick_params(axis='y', which='both', right=False, labelright=False) - best_value = np.nan - - # Plot runs - for i, run in enumerate(runs): - bins, gofs = self.get_data(run, parm) - ax.step(bins, gofs, where='mid') - - # Plot global maximum - try: - best_value, best_gof = self.gm[run][parm] - ax.plot([best_value]*2, [1e-1, best_gof], color=self.colors[i], linestyle='--') - ax.scatter([best_value], [best_gof], marker='x', color=self.colors[i]) - except KeyError: - i -= 1 - continue - - if i == 1 and not np.isnan(best_value): - title = fr'{label}=${best_value:.0f}$' - ax.set_title(title, fontsize=10, color=self.colors[i]) - - def legend(self): - ax = self.axes[2, 2] - - # Add legend elements - elements = [] - - line = Line2D([0], [0], color=self.colors[0]) - elements.append((line, r'Cycle 1')) - - line = Line2D([0], [0], color=self.colors[1]) - elements.append((line, r'Cycle 2')) - - line = Line2D([0], [0], color=self.colors[2]) - elements.append((line, r'Cycle 3')) - - line = Line2D([0], [0], color='grey', linestyle='--') - label = r'Max GoF' - elements.append((line, label)) - - lines, labels = zip(*elements) - self.fig.legend(lines, labels, bbox_to_anchor=(0.84, 0.4), - loc='center') - - ax.set_axis_off() - - def get_data(self, run_number, par): - df = self.df[self.df.run == run_number] - if df.empty: - return [np.nan], [np.nan] - gofs = [] - bins = [] - for bin_val, group in df.groupby(par): - gof = self.gf.weighted_median(group) - gofs.append(gof) - bins.append(bin_val) - bins = np.array(bins) - gofs = np.array(gofs) - diff = np.diff(bins) - bin_type = 'lin' - if not np.isclose(diff[0], diff[1]): - bin_type = 'log' - bins, gofs = self.gf.add_edges_to_hist(bins, gofs, bin_type=bin_type) - gofs[np.isnan(gofs)] = 1e-1 - return bins, gofs - - -if __name__ == '__main__': - Plot() diff --git a/tests/monte_carlo/plot_run.py b/tests/monte_carlo/plot_run.py deleted file mode 100644 index 1283f6f..0000000 --- a/tests/monte_carlo/plot_run.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Visualise run.""" -import matplotlib.pyplot as plt -import numpy as np -from mpl_toolkits.mplot3d import Axes3D - -from simulations import SimulationOverview -from goodness_of_fit import GoodnessOfFit - -RUN = 5 -run_pars = {1: ['alpha', 'si', 'li'], - 2: ['lum_min', 'lum_max', 'li'], - 3: ['w_mean', 'w_std'], - 4: ['dm_igm_slope', 'dm_host']} - -so = SimulationOverview() -df = so.df[so.df.run == RUN].copy() -par_set = df.par_set.iloc[0] -cols = run_pars[par_set] - -# Group together survey results -gofs = {c: [] for c in cols} -gofs['gof'] = [] -gf = GoodnessOfFit() -for vals, group in df.groupby(cols): - gofs['gof'].append(gf.weighted_median(group)) - for i, c in enumerate(cols): - gofs[c].append(vals[i]) - -fig = plt.figure() -ax = fig.add_subplot(111, projection='3d') - -# Log business -labels = [] -for col in cols: - diff = np.diff(np.sort(list(set(gofs[col])))) - if not np.isclose(diff[0], diff[-1]): - gofs[col] = np.log10(gofs[col]) - label = f'log {col}' - else: - label = col - labels.append(label) - -x = gofs[cols[0]] -y = gofs[cols[1]] -colours = np.array(gofs['gof']) -if len(cols) == 2: - z = 0 -else: - z = gofs[cols[2]] - ax.set_zlabel(labels[2]) - -p = ax.scatter(x, y, z, c=colours, s=20*10**colours) -cbar = fig.colorbar(p, orientation='horizontal', - cax=fig.add_axes([0.05, 0.9, 0.2, 0.03])) -cbar.ax.set_ylabel('GoF') #, rotation=-90, va="bottom") - -xlabel = labels[0] -ylabel = labels[1] - -# ugly, but quick -if xlabel == 'alpha': - xlabel = r'$\alpha$' -if xlabel == 'log lum_min': - xlabel = r'lum$_{\rm min}$' -if xlabel == 'log lum_max': - xlabel = r'lum$_{\rm max}$' -if ylabel == 'log lum_min': - ylabel = r'lum$_{\rm min}$' -if ylabel == 'log lum_max': - ylabel = r'lum$_{\rm max}$' -ax.set_xlabel(xlabel) -ax.set_ylabel(ylabel) - -plt.tight_layout() -plt.show() diff --git a/tests/monte_carlo/run_mc.py b/tests/monte_carlo/run_mc.py deleted file mode 100644 index 01c4adf..0000000 --- a/tests/monte_carlo/run_mc.py +++ /dev/null @@ -1,142 +0,0 @@ -"""Link together all classes to run a full Monte Carlo.""" -import pandas as pd -import numpy as np -import frbpoppy.paths -import os - -from simulations import MonteCarlo, POP_SIZE -from goodness_of_fit import GoodnessOfFit -from plot import Plot - -GENERATE = True -CALC_GOFS = True -RUNS = [10] - - -class RunOverview: - """Gather input for each run.""" - - def __init__(self, load_csv=True): - p = frbpoppy.paths.populations() - self.filename = f'{p}mc/run_overview.csv' - - if load_csv and os.path.isfile(self.filename): - self.df = self.load() - else: - self.df = self.gen_runs() - - def gen_run(self): - return {'alpha': None, - 'si': None, - 'li': None, - 'lum_min': None, - 'lum_max': None, - 'w_mean': None, - 'w_std': None, - 'dm_igm_slope': None, - 'dm_host': None, - 'execute': True, - 'par_set': 0, - 'run': 0} - - def gen_runs(self): - runs = [] - for i in range(10): - r = self.gen_run() - r['run_number'] = i + 1 - r['execute'] = True - r['par_set'] = i % 4 + 1 - if i == 9: # Holder for best values - r['execute'] = False - runs.append(r) - df = pd.DataFrame(runs) - df.set_index('run', inplace=True) - return df - - def load(self): - df = pd.read_csv(self.filename) - df.run = df.run.astype(int) - df.par_set = df.par_set.astype(int) - df = df.loc[:, ~df.columns.str.contains('^Unnamed')] - return df - - def save(self, df=None): - if df is None: - df = self.df - df.to_csv(self.filename) - - -if __name__ == '__main__': - print('Commencing') - runs = RunOverview(load_csv=True) - mc = MonteCarlo(pop_size=POP_SIZE) - - for i, run in runs.df.iterrows(): - - run = runs.df.iloc[i] - - print('='*50) - print(f'On Run {run.run} with par_set {run.par_set}') - print('='*50) - print(run) - - if run.run not in RUNS: - continue - - # Generate parameter sets - if GENERATE: - if run.par_set == 1: - mc.gen_par_set_1(lum_min=run.lum_min, - lum_max=run.lum_max, - w_mean=run.w_mean, - w_std=run.w_std, - dm_igm_slope=run.dm_igm_slope, - dm_host=run.dm_host, - run=run.run) - if run.par_set == 2: - mc.gen_par_set_2(alpha=run.alpha, - si=run.si, - w_mean=run.w_mean, - w_std=run.w_std, - dm_igm_slope=run.dm_igm_slope, - dm_host=run.dm_host, - run=run.run) - if run.par_set == 3: - mc.gen_par_set_3(alpha=run.alpha, - si=run.si, - li=run.li, - lum_min=run.lum_min, - lum_max=run.lum_max, - dm_igm_slope=run.dm_igm_slope, - dm_host=run.dm_host, - run=run.run) - if run.par_set == 4: - mc.gen_par_set_4(alpha=run.alpha, - si=run.si, - li=run.li, - lum_min=run.lum_min, - lum_max=run.lum_max, - w_mean=run.w_mean, - w_std=run.w_std, - run=run.run) - - # Determine the goodness of fit - gf = GoodnessOfFit() - if CALC_GOFS: - gf.calc_gofs(run.run) - - # Find global maximums - gf = GoodnessOfFit() - gms = gf.calc_global_max(run.run) - print('\n') - print(f' Best fits from run {run.run}-> {gms}') - print('\n') - - # Adapt the input for future runs - for j in range(i+1, len(runs.df)): - for par in gms: - runs.df.at[j, par] = gms[par][0] - - runs.save() - - Plot() diff --git a/tests/monte_carlo/simulations.py b/tests/monte_carlo/simulations.py deleted file mode 100644 index 1aa4788..0000000 --- a/tests/monte_carlo/simulations.py +++ /dev/null @@ -1,402 +0,0 @@ -"""Run Monte Carlo simulations.""" -from joblib import Parallel, delayed -from frbpoppy import Survey, CosmicPopulation, SurveyPopulation, pprint -from datetime import datetime -from copy import deepcopy - -from glob import glob -import frbpoppy.paths -import os -import numpy as np -import pandas as pd -from tqdm import tqdm -import uuid - -POP_SIZE = 5e7 - - -class SimulationOverview: - """Given values, return uid - - Load from file, or make.""" - - def __init__(self, load_csv=True): - p = frbpoppy.paths.populations() - self.filename = f'{p}mc/simluation_overview.csv' - - if load_csv and os.path.isfile(self.filename): - self.load() - else: - self.df = pd.DataFrame() - - def load(self): - self.df = pd.read_csv(self.filename, index_col=0) - self.df = self.df.loc[:, ~self.df.columns.str.contains('^Unnamed')] - - def save(self): - self.df.to_csv(self.filename) - - def append(self, df): - self.df = self.df.append(df, ignore_index=True) - - def map_surveys(self, ix, names): - mapping = dict(zip(ix, names)) - self.df.replace({"survey": mapping}, inplace=True) - - -class MonteCarlo: - - def __init__(self, pop_size=1e2, load_csv=True): - self.survey_names = ['parkes-htru', - 'chime-frb', - 'askap-incoh', - 'wsrt-apertif'] - self.load_csv = load_csv - self.pop_size = pop_size - self.survey_ix = [i for i in range(len(self.survey_names))] - self.surveys = self.set_up_surveys() - self.so = SimulationOverview(load_csv=self.load_csv) - self.set_up_dirs() - - def set_up_surveys(self): - """Set up surveys.""" - surveys = [] - for name in self.survey_names: - survey = Survey(name=name) - survey.set_beam(model='airy', n_sidelobes=1) - if name in ('chime-frb', 'wsrt-apertif', 'parkes-htru'): - survey.set_beam(model=name) - surveys.append(survey) - return surveys - - def set_up_dirs(self, run=np.nan): - """Create subdirectory for saving populations. - - Returns True if directory had to be set up.""" - f = f'{frbpoppy.paths.populations()}mc/' - if not os.path.isdir(f): - os.mkdir(f) - return True - - if not np.isnan(run): - f = f'{frbpoppy.paths.populations()}mc/run_{run}/' - if not os.path.isdir(f): - os.mkdir(f) - return True - - return False - - def gen_par_set_1(self, - parallel=True, - lum_min=np.nan, - lum_max=np.nan, - w_mean=np.nan, - w_std=np.nan, - dm_igm_slope=np.nan, - dm_host=np.nan, - run=0): - alphas = np.linspace(-2.5, -1, 11) - sis = np.linspace(-2, 2, 11) - lis = np.linspace(-2, 0, 11) - - # Put all options into a dataframe - if 'run' in self.so.df: - self.so.df = self.so.df[self.so.df.run != run] - opt = np.meshgrid(alphas, sis, lis, self.survey_ix) - options = np.array(opt).T.reshape(-1, 4) - df = pd.DataFrame(options, columns=('alpha', 'si', 'li', 'survey')) - df['run'] = run - df['par_set'] = 1 - df['uuid'] = [uuid.uuid4() for _ in range(len(df.index))] - df['date'] = datetime.today() - self.so.append(df) - self.so.map_surveys(self.survey_ix, self.survey_names) - self.so.save() - - # Remove previous par_set of the same number - if not self.set_up_dirs(run=run): - fs = f'{frbpoppy.paths.populations()}mc/run_{run}/*' - for f in glob(fs): - os.remove(f) - - def iter_alpha(i): - alpha = alphas[i] - pop = CosmicPopulation.complex(self.pop_size) - - pop.set_dist(model='vol_co', z_max=1.0, alpha=alpha) - pop.set_lum(model='constant', value=1) - - if not np.isnan(w_mean): - pop.set_w(model='lognormal', mean=w_mean, std=w_std) - if not np.isnan(dm_igm_slope): - pop.set_dm_igm(model='ioka', slope=dm_igm_slope) - pop.set_dm_host(model='constant', value=dm_host) - - pop.generate() - - for si in sis: - pop.set_si(model='constant', value=si) - pop.gen_si() - - for li in lis: - pop.set_lum(model='powerlaw', - low=1e40, - high=1e45, power=li) - - if not np.isnan(lum_min): - pop.set_lum(model='powerlaw', low=lum_min, - high=lum_max, index=li) - - pop.gen_lum() - - for survey in self.surveys: - surv_pop = SurveyPopulation(pop, survey) - - # Get unique identifier - mask = (self.so.df.par_set == 1) - mask &= (self.so.df.run == run) - mask &= (self.so.df.alpha == alpha) - mask &= (self.so.df.si == si) - mask &= (self.so.df.li == li) - mask &= (self.so.df.survey == survey.name) - uuid = self.so.df[mask].uuid.iloc[0] - surv_pop.name = f'mc/run_{run}/{uuid}' - surv_pop.save() - - if parallel: - n_cpu = min([3, os.cpu_count() - 1]) - pprint(f'{os.cpu_count()} CPUs available') - r = range(len(alphas)) - Parallel(n_jobs=n_cpu)(delayed(iter_alpha)(i) for i in tqdm(r)) - else: - [iter_alpha(i) for i in tqdm(range(len(alphas)))] - - def gen_par_set_2(self, - parallel=True, - alpha=-1.5, - si=0, - w_mean=np.nan, - w_std=np.nan, - dm_igm_slope=np.nan, - dm_host=np.nan, - run=np.nan): - lis = np.linspace(-1.5, 0, 11) - lum_mins = 10**np.linspace(38, 46, 11) - lum_maxs = 10**np.linspace(38, 46, 11) - - # Put all options into a dataframe - self.so.df = self.so.df[self.so.df.run != run] - opt = np.meshgrid(lis, lum_mins, lum_maxs, self.survey_ix) - options = np.array(opt).T.reshape(-1, 4) - cols = ('li', 'lum_min', 'lum_max', 'survey') - df = pd.DataFrame(options, columns=cols) - df['par_set'] = 2 - df['run'] = run - df['uuid'] = [uuid.uuid4() for _ in range(len(df.index))] - df['date'] = datetime.today() - df = df[~(df.lum_max < df.lum_min)] - self.so.append(df) - self.so.map_surveys(self.survey_ix, self.survey_names) - self.so.save() - - # Remove previous par_set of the same number - if not self.set_up_dirs(run=run): - fs = f'{frbpoppy.paths.populations()}mc/run_{run}/*' - for f in glob(fs): - os.remove(f) - - pop = CosmicPopulation.complex(self.pop_size) - if not np.isnan(alpha): - pop.set_dist(model='vol_co', z_max=1.0, alpha=alpha) - pop.set_si(model='constant', value=si) - pop.set_lum(model='constant', value=1) - if not np.isnan(w_mean): - pop.set_w(model='lognormal', mean=w_mean, std=w_std) - if not np.isnan(dm_igm_slope): - pop.set_dm_igm(model='ioka', slope=dm_igm_slope) - pop.set_dm_host(model='constant', value=dm_host) - - pop.generate() - - def adapt_pop(e): - li, lum_min, lum_max = e - if lum_max < lum_min: - return - t_pop = deepcopy(pop) - t_pop.set_lum(model='powerlaw', low=lum_min, high=lum_max, - power=li) - t_pop.gen_lum() - - for survey in self.surveys: - surv_pop = SurveyPopulation(t_pop, survey) - - # Get unique identifier - mask = (self.so.df.par_set == 2) - mask &= (self.so.df.run == run) - mask &= (self.so.df.li == li) - mask &= (self.so.df.lum_min == lum_min) - mask &= (self.so.df.lum_max == lum_max) - mask &= (self.so.df.survey == survey.name) - uuid = self.so.df[mask].uuid.iloc[0] - surv_pop.name = f'mc/run_{run}/{uuid}' - surv_pop.save() - - n_cpu = min([3, os.cpu_count() - 1]) - pprint(f'{os.cpu_count()} CPUs available') - mg = np.meshgrid(lis, lum_mins, lum_maxs) - loop = np.array(mg).T.reshape(-1, 3) - if parallel: - Parallel(n_jobs=n_cpu)(delayed(adapt_pop)(e) for e in tqdm(loop)) - else: - [adapt_pop(e) for e in tqdm(loop)] - - def gen_par_set_3(self, - parallel=True, - alpha=-1.5, - si=0, - li=-1, - lum_min=1e40, - lum_max=1e40, - dm_igm_slope=np.nan, - dm_host=np.nan, - run=np.nan): - w_means = 10**np.linspace(-3, 1, 11) - w_stds = np.linspace(0, 3, 11) - - # Put all options into a dataframe - self.so.df = self.so.df[self.so.df.run != run] - opt = np.meshgrid(w_means, w_stds, self.survey_ix) - options = np.array(opt).T.reshape(-1, 3) - cols = ('w_mean', 'w_std', 'survey') - df = pd.DataFrame(options, columns=cols) - df['run'] = run - df['par_set'] = 3 - df['uuid'] = [uuid.uuid4() for _ in range(len(df.index))] - df['date'] = datetime.today() - self.so.append(df) - self.so.map_surveys(self.survey_ix, self.survey_names) - self.so.save() - - # Remove previous par_set of the same number - if not self.set_up_dirs(run=run): - fs = f'{frbpoppy.paths.populations()}mc/run_{run}/*' - for f in glob(fs): - os.remove(f) - - pop = CosmicPopulation.complex(self.pop_size) - - if not np.isnan(alpha): - pop.set_dist(model='vol_co', z_max=1.0, alpha=alpha) - pop.set_si(model='constant', value=si) - if not np.isnan(lum_min): - pop.set_lum(model='powerlaw', low=lum_min, high=lum_max, index=li) - if not np.isnan(dm_igm_slope): - pop.set_dm_igm(model='ioka', slope=dm_igm_slope) - pop.set_dm_host(model='constant', value=dm_host) - - pop.generate() - - def adapt_pop(e): - w_mean, w_std = e - t_pop = deepcopy(pop) - t_pop.set_w(model='lognormal', mean=w_mean, std=w_std) - t_pop.gen_w() - - for survey in self.surveys: - surv_pop = SurveyPopulation(t_pop, survey) - - # Get unique identifier - mask = (self.so.df.par_set == 3) - mask &= (self.so.df.run == run) - mask &= (self.so.df.run == run) - mask &= (self.so.df.w_mean == w_mean) - mask &= (self.so.df.w_std == w_std) - mask &= (self.so.df.survey == survey.name) - uuid = self.so.df[mask].uuid.iloc[0] - surv_pop.name = f'mc/run_{run}/{uuid}' - surv_pop.save() - - n_cpu = min([3, os.cpu_count() - 1]) - pprint(f'{os.cpu_count()} CPUs available') - mg = np.meshgrid(w_means, w_stds) - loop = np.array(mg).T.reshape(-1, 2) - if parallel: - Parallel(n_jobs=n_cpu)(delayed(adapt_pop)(e) for e in tqdm(loop)) - else: - [adapt_pop(e) for e in tqdm(loop)] - - def gen_par_set_4(self, - parallel=True, - alpha=-1.5, - si=0, - li=-1, - lum_min=1e40, - lum_max=1e40, - w_mean=np.nan, - w_std=np.nan, - run=np.nan): - dm_igm_slopes = np.linspace(800, 1200, 11) - dm_hosts = np.linspace(0, 500, 11) - - # Put all options into a dataframe - self.so.df = self.so.df[self.so.df.run != run] - opt = np.meshgrid(dm_igm_slopes, dm_hosts, self.survey_ix) - options = np.array(opt).T.reshape(-1, 3) - cols = ('dm_igm_slope', 'dm_host', 'survey') - df = pd.DataFrame(options, columns=cols) - df['run'] = run - df['par_set'] = 4 - df['uuid'] = [uuid.uuid4() for _ in range(len(df.index))] - df['date'] = datetime.today() - self.so.append(df) - self.so.map_surveys(self.survey_ix, self.survey_names) - self.so.save() - - # Remove previous par_set of the same number - if not self.set_up_dirs(run=run): - fs = f'{frbpoppy.paths.populations()}mc/run_{run}/*' - for f in glob(fs): - os.remove(f) - - pop = CosmicPopulation.complex(self.pop_size) - - if not np.isnan(alpha): - pop.set_dist(model='vol_co', z_max=1.0, alpha=alpha) - pop.set_si(model='constant', value=si) - if not np.isnan(lum_min): - pop.set_lum(model='powerlaw', low=lum_min, high=lum_max, index=li) - if not np.isnan(w_mean): - pop.set_w(model='lognormal', mean=w_mean, std=w_std) - pop.generate() - - def adapt_pop(e): - dm_igm_slope, dm_host = e - t_pop = deepcopy(pop) - t_pop.set_dm_igm(model='ioka', slope=dm_igm_slope) - t_pop.gen_dm_igm() - t_pop.set_dm_host(model='constant', value=dm_host) - t_pop.gen_dm_host() - t_pop.frbs.dm = t_pop.frbs.dm_mw + t_pop.frbs.dm_igm - t_pop.frbs.dm += t_pop.frbs.dm_host - - for survey in self.surveys: - surv_pop = SurveyPopulation(t_pop, survey) - - # Get unique identifier - mask = (self.so.df.par_set == 4) - mask &= (self.so.df.run == run) - mask &= (self.so.df.dm_igm_slope == dm_igm_slope) - mask &= (self.so.df.dm_host == dm_host) - mask &= (self.so.df.survey == survey.name) - uuid = self.so.df[mask].uuid.iloc[0] - surv_pop.name = f'mc/run_{run}/{uuid}' - surv_pop.save() - - n_cpu = min([4, os.cpu_count() - 1]) - pprint(f'{os.cpu_count()} CPUs available') - mg = np.meshgrid(dm_igm_slopes, dm_hosts) - loop = np.array(mg).T.reshape(-1, 2) - if parallel: - Parallel(n_jobs=n_cpu)(delayed(adapt_pop)(e) for e in tqdm(loop)) - else: - [adapt_pop(e) for e in tqdm(loop)] diff --git a/tests/monte_carlo/weighted_quantiles.py b/tests/monte_carlo/weighted_quantiles.py deleted file mode 100644 index 136b5d4..0000000 --- a/tests/monte_carlo/weighted_quantiles.py +++ /dev/null @@ -1,83 +0,0 @@ -import numpy as np - - -def quantile_1D(data, weights, quantile): - """ - Compute the weighted quantile of a 1D numpy array. - Parameters - ---------- - data : ndarray - Input array (one dimension). - weights : ndarray - Array with the weights of the same size of `data`. - quantile : float - Quantile to compute. It must have a value between 0 and 1. - Returns - ------- - quantile_1D : float - The output value. - """ - # Check the data - if not isinstance(data, np.matrix): - data = np.asarray(data) - if not isinstance(weights, np.matrix): - weights = np.asarray(weights) - nd = data.ndim - if nd != 1: - raise TypeError("data must be a one dimensional array") - ndw = weights.ndim - if ndw != 1: - raise TypeError("weights must be a one dimensional array") - if data.shape != weights.shape: - raise TypeError("the length of data and weights must be the same") - if ((quantile > 1.) or (quantile < 0.)): - raise ValueError("quantile must have a value between 0. and 1.") - # Sort the data - ind_sorted = np.argsort(data) - sorted_data = data[ind_sorted] - sorted_weights = weights[ind_sorted] - # Compute the auxiliary arrays - Sn = np.cumsum(sorted_weights) - # TODO: Check that the weights do not sum zero - # assert Sn != 0, "The sum of the weights must not be zero" - Pn = (Sn-0.5*sorted_weights)/Sn[-1] - # Get the value of the weighted median - return np.interp(quantile, Pn, sorted_data) - - -def quantile(data, weights, quantile): - """ - Weighted quantile of an array with respect to the last axis. - Parameters - ---------- - data : ndarray - Input array. - weights : ndarray - Array with the weights. It must have the same size of the last - axis of `data`. - quantile : float - Quantile to compute. It must have a value between 0 and 1. - Returns - ------- - quantile : float - The output value. - """ - # TODO: Allow to specify the axis - nd = data.ndim - if nd == 0: - TypeError("data must have at least one dimension") - elif nd == 1: - return quantile_1D(data, weights, quantile) - elif nd > 1: - n = data.shape - imr = data.reshape((np.prod(n[:-1]), n[-1])) - result = np.apply_along_axis(quantile_1D, -1, imr, weights, quantile) - return result.reshape(n[:-1]) - - -def median(data, weights): - """ - Weighted median of an array with respect to the last axis. - Alias for `quantile(data, weights, 0.5)`. - """ - return quantile(data, weights, 0.5)