Figure 12 & 13: Gaussianity of residual noise#
1. Imports#
[1]:
import pickle
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import cmocean
color_map = cmocean.cm.ice
from applefy.utils.file_handling import open_fits
from applefy.gaussianity.residual_tests import extract_circular_annulus, gaussian_r2
from fours.utils.data_handling import read_fours_root_dir
2. Contrast curves#
We need the contrast curves to determine the optimal choice of the method hyper-parameters for each dataset and separation from the star.
(PCA components, 4S - \(\lambda\))
[2]:
root_dir = Path(read_fours_root_dir())
fake_planet_experiments_dir = root_dir / Path("70_results/x1_fake_planet_experiments/")
Data in the FOURS_ROOT_DIR found. Location: /fast/mbonse/s4
[3]:
contrast_curves_file = fake_planet_experiments_dir / Path("contrast_curves/")
[4]:
s4_keys = [
"s4_median_lambda_000100",
"s4_median_lambda_001000",
"s4_median_lambda_010000",
"s4_median_lambda_100000",
"s4_mean_lambda_000100",
"s4_mean_lambda_001000",
"s4_mean_lambda_010000",
"s4_mean_lambda_100000",
]
[5]:
pca_results = dict()
s4_results = dict()
for tmp_file in contrast_curves_file.iterdir():
if not tmp_file.name.endswith(".pkl"):
continue
tmp_datset_name = tmp_file.name[:-5]
with open(tmp_file, 'rb') as f:
tmp_data = pickle.load(f)
# merge all data
tmp_result_table = pd.concat(tmp_data.values(), keys=tmp_data.keys(), axis=1)
pca_columns = tmp_result_table.columns.get_level_values(0).difference(s4_keys)
# select results for PCA and S4
pca_results[tmp_datset_name] = tmp_result_table[pca_columns]
s4_results[tmp_datset_name] = tmp_result_table[s4_keys]
# round the separation
pca_results[tmp_datset_name].index = np.round(pca_results[tmp_datset_name].index, 2)
s4_results[tmp_datset_name].index = np.round(s4_results[tmp_datset_name].index, 2)
3. Extract residual noise for two separations#
[6]:
# Note: Two datasets are removed from the list due to oversaturation close to the star
datasets = {
"HD2262_305_199_C-0065_C": "#1 (HD 2262)",
"HD7570_331_1101_C-0092_C": "#2 (HD 7570)",
"HD11171_332_1101_C-0092_C": "#3 (HD 11171)",
"HD22049_351_096_C-0679_A": "#4 (HD 22049)",
"HD22049_303_199_C-0065_C": "#5 (HD 22049)",
"HD38678_331_084_C-0396_A": "#6 (HD 38678)",
"HD40136_333_1101_C-0092_C": "#7 (HD 40136)",
"HD115892_143_1101_C-0092_E": "#8 (HD 115892)",
"HD169022_140_1101_C-0092_E": "#9 (HD 169022)",
}
[7]:
def get_residual_noise(
result_dict, # PCA or 4S
dataset_name,
separation,
annulus_width=2.0):
# 1.) Select dataset and find best hyperparameters
result_table = result_dict[dataset_name]
tmp_best_hyperparameters = result_table.idxmax(axis=1)[separation][0]
print("The best hyper-parameter for " + dataset_name + ":")
print(tmp_best_hyperparameters)
# 2.) load the residual
tmp_residual_file = fake_planet_experiments_dir / Path(dataset_name + \
"_/residuals/" + tmp_best_hyperparameters + "/residual_ID_0000.fits")
residual_frame = open_fits(tmp_residual_file)
# 3.) Extract the noise
_, positions, mask = extract_circular_annulus(
input_residual_frame=residual_frame,
separation=separation,
size_resolution_elements=3.6, # fwhm = 3.6
annulus_width=annulus_width)
selected_noise = residual_frame*mask
return selected_noise
[8]:
def get_r2_values(
result_dict, # PCA or 4S
dataset_name,
separation,
annulus_width=2.0):
local_noise = get_residual_noise(
result_dict,
dataset_name,
separation,
annulus_width)
local_noise = local_noise[local_noise!=0]
return gaussian_r2(local_noise, fit_method="theil sen")
[9]:
def get_r2_sep(separation, width):
all_results = []
all_names = []
dataset_keys = list(datasets.keys())
for tmp_dataset, tmp_name in datasets.items():
tmp_pca_r2 = get_r2_values(pca_results, tmp_dataset, separation, width)
tmp_s4_r2 = get_r2_values(s4_results, tmp_dataset, separation, width)
all_results.append((tmp_pca_r2, tmp_s4_r2))
all_names.append(tmp_name)
return np.array(all_results), all_names
4. Create the first plot#
[10]:
all_results_inner, names = get_r2_sep(3.5, 2.0)
all_results_outer, names = get_r2_sep(6.0, 2.0)
The best hyper-parameter for HD2262_305_199_C-0065_C:
stacked_05_PCA_050_components
The best hyper-parameter for HD2262_305_199_C-0065_C:
s4_mean_lambda_001000
The best hyper-parameter for HD7570_331_1101_C-0092_C:
stacked_05_PCA_020_components
The best hyper-parameter for HD7570_331_1101_C-0092_C:
s4_mean_lambda_010000
The best hyper-parameter for HD11171_332_1101_C-0092_C:
stacked_05_PCA_050_components
The best hyper-parameter for HD11171_332_1101_C-0092_C:
s4_median_lambda_001000
The best hyper-parameter for HD22049_351_096_C-0679_A:
stacked_05_PCA_080_components
The best hyper-parameter for HD22049_351_096_C-0679_A:
s4_mean_lambda_000100
The best hyper-parameter for HD22049_303_199_C-0065_C:
stacked_05_PCA_070_components
The best hyper-parameter for HD22049_303_199_C-0065_C:
s4_mean_lambda_000100
The best hyper-parameter for HD38678_331_084_C-0396_A:
stacked_05_PCA_070_components
The best hyper-parameter for HD38678_331_084_C-0396_A:
s4_median_lambda_001000
The best hyper-parameter for HD40136_333_1101_C-0092_C:
stacked_05_PCA_050_components
The best hyper-parameter for HD40136_333_1101_C-0092_C:
s4_median_lambda_001000
The best hyper-parameter for HD115892_143_1101_C-0092_E:
stacked_05_PCA_080_components
The best hyper-parameter for HD115892_143_1101_C-0092_E:
s4_mean_lambda_001000
The best hyper-parameter for HD169022_140_1101_C-0092_E:
stacked_05_PCA_016_components
The best hyper-parameter for HD169022_140_1101_C-0092_E:
s4_mean_lambda_000100
The best hyper-parameter for HD2262_305_199_C-0065_C:
stacked_05_PCA_070_components
The best hyper-parameter for HD2262_305_199_C-0065_C:
s4_mean_lambda_100000
The best hyper-parameter for HD7570_331_1101_C-0092_C:
stacked_05_PCA_070_components
The best hyper-parameter for HD7570_331_1101_C-0092_C:
s4_mean_lambda_100000
The best hyper-parameter for HD11171_332_1101_C-0092_C:
stacked_05_PCA_050_components
The best hyper-parameter for HD11171_332_1101_C-0092_C:
s4_median_lambda_010000
The best hyper-parameter for HD22049_351_096_C-0679_A:
stacked_05_PCA_250_components
The best hyper-parameter for HD22049_351_096_C-0679_A:
s4_mean_lambda_001000
The best hyper-parameter for HD22049_303_199_C-0065_C:
stacked_05_PCA_200_components
The best hyper-parameter for HD22049_303_199_C-0065_C:
s4_median_lambda_000100
The best hyper-parameter for HD38678_331_084_C-0396_A:
stacked_05_PCA_140_components
The best hyper-parameter for HD38678_331_084_C-0396_A:
s4_mean_lambda_001000
The best hyper-parameter for HD40136_333_1101_C-0092_C:
stacked_05_PCA_160_components
The best hyper-parameter for HD40136_333_1101_C-0092_C:
s4_median_lambda_000100
The best hyper-parameter for HD115892_143_1101_C-0092_E:
stacked_05_PCA_120_components
The best hyper-parameter for HD115892_143_1101_C-0092_E:
s4_median_lambda_000100
The best hyper-parameter for HD169022_140_1101_C-0092_E:
stacked_05_PCA_050_components
The best hyper-parameter for HD169022_140_1101_C-0092_E:
s4_median_lambda_000100
[11]:
fig, axis = plt.subplots(1, 1, figsize=(8, 5.5))
offset = 0.15 # scatter inner and outer points
# Inner results
y_values_inner = np.arange(len(datasets))
axis.scatter(all_results_inner[::-1, 0], y_values_inner + offset,
color="navy", marker="s", s=80,
label="optimal PCA ($2.5 - 4.5 \lambda /D$)")
axis.scatter(all_results_inner[::-1, 1], y_values_inner + offset,
color="orange", marker="s", s=80,
label="4S ($2.5 - 4.5 \lambda /D$)")
# Outer results
y_values_outer = np.arange(len(datasets))
axis.scatter(all_results_outer[::-1, 0], y_values_outer - offset,
color="navy", marker=".", s=120,
label="optimal PCA ($5.0 - 7.0 \lambda /D$)")
axis.scatter(all_results_outer[::-1, 1], y_values_outer - offset,
color="orange", marker=".", s=120,
label="4S ($5.0 - 7.0 \lambda /D$)")
# Add Labels and titles
axis.set_xlabel("Gaussianity [$R^2$]", size=18,)
axis.set_ylabel("Dataset", size=18, labelpad=10)
axis.set_title("Gaussianity of residual noise", fontsize=20, fontweight="bold", y=1.04)
# Add legend
leg1 = axis.legend(
bbox_to_anchor=(0.45, -0.36),
fontsize=14, ncol=2,
loc='lower center')
# Change tick size
axis.set_yticks(np.arange(len(datasets)), names[::-1])
axis.tick_params(
axis='x', which='major', labelsize=13)
axis.tick_params(
axis='y', which='major', labelsize=13)
# Set limit
axis.set_xlim(0.95, 1.0)
axis.hlines(np.arange(9), 0.95, 1.0, color="lightgray", alpha=0.5, lw=25, zorder=0)
fig.patch.set_facecolor('white')
plt.savefig("./final_plots/0a2_gaussianity_residual_noise.pdf", bbox_inches='tight')
5. Create a Plot with Examples#
[12]:
example_1 = "HD169022_140_1101_C-0092_E"
example_2 = "HD22049_303_199_C-0065_C"
[13]:
def plot_QQ(axis_in,
noise_pixel,
color,
label_name=None,
add_r2=True):
# unpack the results
r2, tmp_linear_model, gaussian_samples = gaussian_r2(
noise_samples=noise_pixel,
fit_method="theil sen",
return_fit=True)
# Plot the Q-Q points
axis_in.scatter(sorted(gaussian_samples),
sorted(noise_pixel),
s=20,
color=color)
#axis_in.grid()
# Plot the fit model
x = np.linspace(-5, 5, 100)
axis_in.plot(x, tmp_linear_model.predict(x.reshape(-1, 1)),
'grey', lw=2, alpha=0.5)
# Set the axis limits
axis_in.set_xlim(np.min(gaussian_samples)*1.3,
np.max(gaussian_samples)*1.3)
margin = np.max([np.abs(np.min(noise_pixel)*0.2),
np.abs(np.max(noise_pixel)*0.2)])
axis_in.set_ylim(np.min(noise_pixel) - margin,
np.max(noise_pixel) + margin)
# Add additional information if needed
if add_r2:
text = r"$R^2 = $" + "%0.3f"%r2
axis_in.text(0.7, 0.08, text, ha="center",
fontsize=10,
transform = axis_in.transAxes,
bbox={"fc":"white", "ec":"white"})
# Set the axis labels and make the plot quadratic
axis_in.set_aspect(1.0/axis_in.get_data_ratio(), adjustable='box')
axis_in.set_xlabel("Expected \n normal values", fontsize=10)
axis_in.tick_params(axis='both', which='major', labelsize=8)
[14]:
def remove_image_ticks(axis_in):
axis_in.set_xticks([])
axis_in.set_yticks([])
[15]:
def plot_example(basis_in, example_name):
residual_frame_pca = get_residual_noise(
pca_results, example_name, 3.5, 2.0)
residual_frame_s4 = get_residual_noise(
s4_results, example_name, 3.5, 2.0)
gs1 = gridspec.GridSpecFromSubplotSpec(
2, 2, subplot_spec = basis_in,
hspace=0.1,
wspace=0.13)
axis_pca_qq = fig.add_subplot(gs1[1, 0])
axis_s4_qq = fig.add_subplot(gs1[1, 1])
axis_pca_residual = fig.add_subplot(gs1[0, 0])
axis_s4_residual = fig.add_subplot(gs1[0, 1])
plot_QQ(axis_pca_qq, residual_frame_pca[residual_frame_pca!=0], color="navy")
plot_QQ(axis_s4_qq, residual_frame_s4[residual_frame_s4!=0], color="orange")
axis_pca_qq.set_ylabel("Observations", fontsize=10)
axis_pca_residual.imshow(
residual_frame_pca[24:-24, 24:-24],
cmap=color_map, origin='lower')
remove_image_ticks(axis_pca_residual)
axis_s4_residual.imshow(
residual_frame_s4[24:-24, 24:-24],
cmap=color_map, origin='lower')
remove_image_ticks(axis_s4_residual)
axis_pca_residual.set_title("Optimal PCA", fontsize=11)
axis_s4_residual.set_title("4S", fontsize=12)
return axis_pca_residual, axis_s4_residual
[16]:
# 1.) Create Plot Layout
fig = plt.figure(constrained_layout=False, figsize=(12, 5.))
gs0 = fig.add_gridspec(
1, 2,
hspace=0.0,
wspace=0.15)
pca_ax, s4_ax = plot_example(gs0[0], example_1)
plot_example(gs0[1], example_2)
text1 = fig.text(0.31, 0.98, 'Gaussianity of residual noise - #9 (HD 169022)',
ha='center', va='top', fontsize=12, fontweight="bold")
text2 = fig.text(0.72, 0.98, 'Gaussianity of residual noise - #5 (HD 22049)',
ha='center', va='top', fontsize=12, fontweight="bold")
fig.patch.set_facecolor('white')
plt.savefig("./final_plots/0a1_gaussianity_residual_noise.pdf", bbox_inches='tight')
The best hyper-parameter for HD169022_140_1101_C-0092_E:
stacked_05_PCA_016_components
The best hyper-parameter for HD169022_140_1101_C-0092_E:
s4_mean_lambda_000100
The best hyper-parameter for HD22049_303_199_C-0065_C:
stacked_05_PCA_070_components
The best hyper-parameter for HD22049_303_199_C-0065_C:
s4_mean_lambda_000100