Akritas’ remarks are generally very insightfull. Please read remark 6.2-1. Let’s do some examples below. Copy the code snippets below. Or you can download this webpage as an Rmd file.
In his remark, he says the model based estimators are better if the correct model is known. However, model based estimates can be misleading (read horribly wrong) if the model assumption is incorrect.
library(reshape)
library(ggplot2)
uniform_runs = function(sample=30){
unif_data = runif(sample)
model_mean = (max(unif_data)-min(unif_data))/2
free_mean = mean(unif_data)
c("model"=model_mean,"free"=free_mean)
}
wide_estimates = replicate(1700,uniform_runs(sample=115))
apply(wide_estimates,1,mean)
apply(wide_estimates,1,sd)
long_estimates = melt(wide_estimates)
qplot(data=long_estimates,x=value)+facet_wrap(~X1,ncol=1,scales="free_y")
library(reshape)
library(ggplot2)
uniform_runs = function(sample=30){
unif_data = runif(sample)
model_mean = (max(unif_data)-min(unif_data))/2
free_mean = mean(unif_data)
c("model"=model_mean,"free"=free_mean)
}
wide_estimates = replicate(1700,uniform_runs(sample=115))
apply(wide_estimates,1,mean)
## model free
## 0.4915459 0.5006877
apply(wide_estimates,1,sd)
## model free
## 0.005867268 0.026887230
long_estimates = melt(wide_estimates)
qplot(data=long_estimates,x=value)+facet_wrap(~X1,ncol=1,scales="free_y")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Now what happens if we use a uniform model to estimate the mean when in fact it is not uniform (it is normal).
library(reshape)
library(ggplot2)
normal_runs = function(sample=15){
unif_data = rnorm(sample)
model_mean = (max(unif_data)-min(unif_data))/2
free_mean = mean(unif_data)
c("model"=model_mean,"free"=free_mean)
}
wide_estimates_bad = replicate(1700,normal_runs(sample=115))
apply(wide_estimates_bad,1,mean)
apply(wide_estimates_bad,1,sd)
long_estimates_bad = melt(wide_estimates_bad)
qplot(data=long_estimates_bad,x=value)+facet_wrap(~X1,ncol=1,scales="free_y")
library(reshape)
library(ggplot2)
normal_runs = function(sample=15){
unif_data = rnorm(sample)
model_mean = (max(unif_data)-min(unif_data))/2
free_mean = mean(unif_data)
c("model"=model_mean,"free"=free_mean)
}
wide_estimates_bad = replicate(1700,normal_runs(sample=115))
apply(wide_estimates_bad,1,mean)
## model free
## 2.556818354 0.001176282
apply(wide_estimates_bad,1,sd)
## model free
## 0.29206107 0.09146539
long_estimates_bad = melt(wide_estimates_bad)
qplot(data=long_estimates_bad,x=value)+facet_wrap(~X1,ncol=1,scales="free_y")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Here is a nice cheat sheet for the Weibull Distribution. We are also going to use the MASS
library to help us fit a model to our data. Don’t worry about understanding the fitting procedure (6.3.2 Describes the method we are using with the fitdsitr
function).
We are going to generate random data from a known distribution Weibull
and then see how well our estimates represents the truth.
library(MASS)
n=10
percentile_desired = .01
shape_w = 30
scale_w = 60
X5 = rweibull(n,shape_w,scale_w)
fit_w = fitdistr(X5,"weibull")
fit_n = fitdistr(X5,"normal")
fit_n
mean(X5); sd(X5)
hist(X5)
qqnorm(X5)
qqline(X5)
# The upper percentile
# Model Free
quantile(X5,1-percentile_desired)
# Model Based: Not correct Model
qnorm(1-percentile_desired,mean=fit_n$estimate["mean"],sd=fit_n$estimate["sd"])
# Model Based: Correct Distribution, Estimated Parameters
qweibull(1-percentile_desired,shape=fit_w$estimate["shape"],scale=fit_w$estimate["scale"])
# Model Based: Correct Distribution, Correct Parameters
qweibull(1-percentile_desired,shape_w,scale_w)
# Now look at the lower percentile
# Model Free
quantile(X5,percentile_desired)
# Model Based: Not correct Model
qnorm(percentile_desired,mean=fit_n$estimate["mean"],sd=fit_n$estimate["sd"])
# Model Based: Correct Distribution, Estimated Parameters
qweibull(percentile_desired,shape=fit_w$estimate["shape"],scale=fit_w$estimate["scale"])
# Model Based: Correct Distribution, Correct Parameters
qweibull(percentile_desired,shape_w,scale_w)
library(MASS)
n=10
percentile_desired = .01
shape_w = 30
scale_w = 60
X5 = rweibull(n,shape_w,scale_w)
fit_w = fitdistr(X5,"weibull")
fit_n = fitdistr(X5,"normal")
fit_n
## mean sd
## 56.6151645 3.4152786
## ( 1.0800059) ( 0.7636795)
mean(X5); sd(X5)
## [1] 56.61516
## [1] 3.60002
hist(X5)
qqnorm(X5)
qqline(X5)
# The upper percentile
# Model Free
quantile(X5,1-percentile_desired)
## 99%
## 59.94553
# Model Based: Not correct Model
qnorm(1-percentile_desired,mean=fit_n$estimate["mean"],sd=fit_n$estimate["sd"])
## [1] 64.56029
# Model Based: Correct Distribution, Estimated Parameters
qweibull(1-percentile_desired,shape=fit_w$estimate["shape"],scale=fit_w$estimate["scale"])
## [1] 62.08192
# Model Based: Correct Distribution, Correct Parameters
qweibull(1-percentile_desired,shape_w,scale_w)
## [1] 63.13344
# Now look at the lower percentile
# Model Free
quantile(X5,percentile_desired)
## 1%
## 49.435
# Model Based: Not correct Model
qnorm(percentile_desired,mean=fit_n$estimate["mean"],sd=fit_n$estimate["sd"])
## [1] 48.67004
# Model Based: Correct Distribution, Estimated Parameters
qweibull(percentile_desired,shape=fit_w$estimate["shape"],scale=fit_w$estimate["scale"])
## [1] 47.55906
# Model Based: Correct Distribution, Correct Parameters
qweibull(percentile_desired,shape_w,scale_w)
## [1] 51.47037
n= 20
X1= rbinom(n,100,.5)
X2 = rnorm(n,mean=50,sd=5)
X3 = runif(n,41.4,58.6)
X4 = rweibull(n,10,60)
qnorm(.95,mean(X1),sd(X1))
qnorm(.95,mean(X2),sd(X2))
qnorm(.95,mean(X3),sd(X3))
qnorm(.95,mean(X4),sd(X4))
quantile(X1,.95)
quantile(X2,.95)
quantile(X3,.95)
quantile(X4,.95)
qbinom(.95,100,.5)
qnorm(.95,50,5)
qunif(.95,41.4,58.6)
qweibull(.95,10,60)