Figure 3: Histogram comparing sampling strategies#

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import pmagpy.pmag as pmag
import pmagpy.ipmag as ipmag

import smpsite as smp

import warnings 
warnings.filterwarnings('ignore')

%matplotlib inline

Run simulation#

This notebook also includes how to run the simulation for the histograms.

%%time

angular_dispersio_within_site = 10 # degrees
kappa_within_site = smp.angular2kappa(angular_dispersio_within_site)
latitude = 30
outlier_rate = 0.10
n_iters = 5000

params1 = smp.Params(N=100,
                     n0=1,
                     kappa_within_site=kappa_within_site,
                     site_lat=latitude, 
                     site_long=0,
                     outlier_rate=outlier_rate,
                     secular_method="G",
                     kappa_secular=None)

params2 = smp.Params(N=20,
                     n0=5,
                     kappa_within_site=kappa_within_site,
                     site_lat=latitude, 
                     site_long=0,
                     outlier_rate=outlier_rate,
                     secular_method="G",
                     kappa_secular=None)

df_false    = smp.simulate_estimations(params1, n_iters=n_iters, ignore_outliers="False")
df_true     = smp.simulate_estimations(params2, n_iters=n_iters, ignore_outliers="True")
df_vandamme = smp.simulate_estimations(params1, n_iters=n_iters, ignore_outliers="vandamme")

df_false.to_csv("../../outputs/fig3a_df_false.csv")
df_true.to_csv("../../outputs/fig3a_df_true.csv") 
df_vandamme.to_csv("../../outputs/fig3a_df_vandamme.csv") 
CPU times: user 33min 41s, sys: 123 ms, total: 33min 41s
Wall time: 33min 41s

Figure#

# panel = 'a'
# panel = 'b'
panel = 'c'

df_false = pd.read_csv("../../outputs/fig3"+panel+"_df_false.csv")
df_true = pd.read_csv("../../outputs/fig3"+panel+"_df_true.csv")
df_vandamme = pd.read_csv("../../outputs/fig3"+panel+"_df_vandamme.csv")
df_true
Unnamed: 0 plong plat total_samples samples_per_sites S2_vgp error_angle S2_vgp_real n_tot N n0 kappa_within_site site_lat site_long outlier_rate secular_method kappa_secular ignore_outliers
0 0 190.306018 84.314629 36.0 5 218.096967 5.685371 191.7229 100 20 5 66.069981 30 0 0.6 G NaN True
1 1 55.662167 85.483652 34.0 5 215.659161 4.516348 191.7229 100 20 5 66.069981 30 0 0.6 G NaN True
2 2 193.984859 86.462183 34.0 5 265.162109 3.537817 191.7229 100 20 5 66.069981 30 0 0.6 G NaN True
3 3 358.720274 86.835100 34.0 5 461.786418 3.164900 191.7229 100 20 5 66.069981 30 0 0.6 G NaN True
4 4 224.066484 87.495521 28.0 5 206.023724 2.504479 191.7229 100 20 5 66.069981 30 0 0.6 G NaN True
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4995 4995 165.292979 84.319601 39.0 5 121.964481 5.680399 191.7229 100 20 5 66.069981 30 0 0.6 G NaN True
4996 4996 9.619776 87.047602 46.0 5 175.154300 2.952398 191.7229 100 20 5 66.069981 30 0 0.6 G NaN True
4997 4997 338.354021 85.822482 40.0 5 163.134369 4.177518 191.7229 100 20 5 66.069981 30 0 0.6 G NaN True
4998 4998 14.220743 87.025506 48.0 5 163.107882 2.974494 191.7229 100 20 5 66.069981 30 0 0.6 G NaN True
4999 4999 137.642242 86.984741 41.0 5 301.022274 3.015259 191.7229 100 20 5 66.069981 30 0 0.6 G NaN True

5000 rows × 18 columns

%matplotlib inline

if panel == 'a':
    x_max = 8
    y_max = 0.52
    bw = 0.5
elif panel == 'b':
    x_max = 8
    y_max = 0.4
    bw = 0.5
elif panel == 'c':
    x_max = 14
    y_max = 0.25
    bw = 1.0

fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,8))

# Histograms
sns.histplot(df_vandamme.error_angle, ax=axes, color='#e84118', stat='density', binwidth=bw, binrange=(0,20), alpha=.5, label="Strategy 1 \n $n_0=1$, $N=100$")
sns.histplot(df_true.error_angle, ax=axes, color='#0097e6', stat='density', binwidth=bw, binrange=(0,20), alpha=.5, label="Strategy 2 \n $n_0=5$, $N=20$")

# Density plot
sns.kdeplot(df_false.error_angle, ax=axes, color='grey', alpha=.9, lw=5, label="No outlier detection \n $n_0=1$, $N=100$")


rmse1 = np.round(np.mean(df_vandamme.error_angle**2)**.5, decimals=2)
rmse2 = np.round(np.mean(df_true.error_angle**2)**.5, decimals=2)

plt.axvline(x=rmse1, ymax=0.93, c='#e84118', lw=5)
plt.axvline(x=rmse2, ymax=0.93, c='#0097e6', lw=5)

props = dict(boxstyle='round', facecolor="#e84118", alpha=0.5)
plt.text(rmse1/x_max-0.035, 0.986, "{}".format(rmse1), transform=axes.transAxes, fontsize=18,
        verticalalignment='top', bbox=props);

props = dict(boxstyle='round', facecolor='#0097e6', alpha=0.5)
plt.text(rmse2/x_max-0.035, 0.986, "{}".format(rmse2), transform=axes.transAxes, fontsize=18,
        verticalalignment='top', bbox=props);

plt.xlim(0, x_max)
plt.ylim(0, y_max)
plt.xlabel("Angular error (degrees)", fontsize=18)
plt.ylabel("Density", fontsize=18)
plt.xticks(np.arange(0.0, x_max+0.1, 2.0), fontsize=14);
plt.yticks(fontsize=14)
plt.legend(title="Method ($n=100$)", title_fontsize=18, fontsize=18)

ax = plt.gca()
ax.spines[['right', 'top']].set_visible(False)

plt.savefig("Figure3{}.pdf".format(panel), format="pdf", bbox_inches='tight')
plt.savefig("Figure3{}.png".format(panel), format="png", bbox_inches='tight')
../../_images/9f770fea70eb26e50fc678b85c9bf5b024175dad51fbe7054573603c77f23e06.png