Figure 3: Histogram comparing sampling strategies

Contents

Figure 3: Histogram comparing sampling strategies#

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import pmagpy.pmag as pmag
import pmagpy.ipmag as ipmag

import smpsite as smp

import warnings 
warnings.filterwarnings('ignore')

%matplotlib inline

Run simulation#

This notebook also includes how to run the simulation for the histograms.

%%time

angular_dispersio_within_site = 10 # degrees
kappa_within_site = smp.angular2kappa(angular_dispersio_within_site)
latitude = 30
outlier_rate = 0.10
n_iters = 5000

params1 = smp.Params(N=100,
                     n0=1,
                     kappa_within_site=kappa_within_site,
                     site_lat=latitude, 
                     site_long=0,
                     outlier_rate=outlier_rate,
                     secular_method="G",
                     kappa_secular=None)

params2 = smp.Params(N=20,
                     n0=5,
                     kappa_within_site=kappa_within_site,
                     site_lat=latitude, 
                     site_long=0,
                     outlier_rate=outlier_rate,
                     secular_method="G",
                     kappa_secular=None)

df_false    = smp.simulate_estimations(params1, n_iters=n_iters, ignore_outliers="False")
df_true     = smp.simulate_estimations(params2, n_iters=n_iters, ignore_outliers="True")
df_vandamme = smp.simulate_estimations(params1, n_iters=n_iters, ignore_outliers="vandamme")

df_false.to_csv("../../outputs/fig3a_df_false.csv")
df_true.to_csv("../../outputs/fig3a_df_true.csv") 
df_vandamme.to_csv("../../outputs/fig3a_df_vandamme.csv") 

CPU times: user 33min 41s, sys: 123 ms, total: 33min 41s
Wall time: 33min 41s

Figure#

# panel = 'a'
# panel = 'b'
panel = 'c'

df_false = pd.read_csv("../../outputs/fig3"+panel+"_df_false.csv")
df_true = pd.read_csv("../../outputs/fig3"+panel+"_df_true.csv")
df_vandamme = pd.read_csv("../../outputs/fig3"+panel+"_df_vandamme.csv")

df_true

	Unnamed: 0	plong	plat	total_samples	samples_per_sites	S2_vgp	error_angle	S2_vgp_real	n_tot	N	n0	kappa_within_site	site_lat	site_long	outlier_rate	secular_method	kappa_secular	ignore_outliers
0	0	190.306018	84.314629	36.0	5	218.096967	5.685371	191.7229	100	20	5	66.069981	30	0	0.6	G	NaN	True
1	1	55.662167	85.483652	34.0	5	215.659161	4.516348	191.7229	100	20	5	66.069981	30	0	0.6	G	NaN	True
2	2	193.984859	86.462183	34.0	5	265.162109	3.537817	191.7229	100	20	5	66.069981	30	0	0.6	G	NaN	True
3	3	358.720274	86.835100	34.0	5	461.786418	3.164900	191.7229	100	20	5	66.069981	30	0	0.6	G	NaN	True
4	4	224.066484	87.495521	28.0	5	206.023724	2.504479	191.7229	100	20	5	66.069981	30	0	0.6	G	NaN	True
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
4995	4995	165.292979	84.319601	39.0	5	121.964481	5.680399	191.7229	100	20	5	66.069981	30	0	0.6	G	NaN	True
4996	4996	9.619776	87.047602	46.0	5	175.154300	2.952398	191.7229	100	20	5	66.069981	30	0	0.6	G	NaN	True
4997	4997	338.354021	85.822482	40.0	5	163.134369	4.177518	191.7229	100	20	5	66.069981	30	0	0.6	G	NaN	True
4998	4998	14.220743	87.025506	48.0	5	163.107882	2.974494	191.7229	100	20	5	66.069981	30	0	0.6	G	NaN	True
4999	4999	137.642242	86.984741	41.0	5	301.022274	3.015259	191.7229	100	20	5	66.069981	30	0	0.6	G	NaN	True

5000 rows × 18 columns

%matplotlib inline

if panel == 'a':
    x_max = 8
    y_max = 0.52
    bw = 0.5
elif panel == 'b':
    x_max = 8
    y_max = 0.4
    bw = 0.5
elif panel == 'c':
    x_max = 14
    y_max = 0.25
    bw = 1.0

fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,8))

# Histograms
sns.histplot(df_vandamme.error_angle, ax=axes, color='#e84118', stat='density', binwidth=bw, binrange=(0,20), alpha=.5, label="Strategy 1 \n $n_0=1$, $N=100$")
sns.histplot(df_true.error_angle, ax=axes, color='#0097e6', stat='density', binwidth=bw, binrange=(0,20), alpha=.5, label="Strategy 2 \n $n_0=5$, $N=20$")

# Density plot
sns.kdeplot(df_false.error_angle, ax=axes, color='grey', alpha=.9, lw=5, label="No outlier detection \n $n_0=1$, $N=100$")


rmse1 = np.round(np.mean(df_vandamme.error_angle**2)**.5, decimals=2)
rmse2 = np.round(np.mean(df_true.error_angle**2)**.5, decimals=2)

plt.axvline(x=rmse1, ymax=0.93, c='#e84118', lw=5)
plt.axvline(x=rmse2, ymax=0.93, c='#0097e6', lw=5)

props = dict(boxstyle='round', facecolor="#e84118", alpha=0.5)
plt.text(rmse1/x_max-0.035, 0.986, "{}".format(rmse1), transform=axes.transAxes, fontsize=18,
        verticalalignment='top', bbox=props);

props = dict(boxstyle='round', facecolor='#0097e6', alpha=0.5)
plt.text(rmse2/x_max-0.035, 0.986, "{}".format(rmse2), transform=axes.transAxes, fontsize=18,
        verticalalignment='top', bbox=props);

plt.xlim(0, x_max)
plt.ylim(0, y_max)
plt.xlabel("Angular error (degrees)", fontsize=18)
plt.ylabel("Density", fontsize=18)
plt.xticks(np.arange(0.0, x_max+0.1, 2.0), fontsize=14);
plt.yticks(fontsize=14)
plt.legend(title="Method ($n=100$)", title_fontsize=18, fontsize=18)

ax = plt.gca()
ax.spines[['right', 'top']].set_visible(False)

plt.savefig("Figure3{}.pdf".format(panel), format="pdf", bbox_inches='tight')
plt.savefig("Figure3{}.png".format(panel), format="png", bbox_inches='tight')

../../_images/9f770fea70eb26e50fc678b85c9bf5b024175dad51fbe7054573603c77f23e06.png