# pip install RISE


y = c(0, 0, 1, 0,  0, 3, 3, 5,  1, 0, 0, 0,  8, 1, 0, 7)
Trat = gl(4, 4, labels = LETTERS[1:4])  
Trat


options(repr.plot.width = 7, repr.plot.height = 3.5, repr.plot.res = 200)


fo = table(y)/length(y)                   # amostral
fm = dpois(y, lambda = mean(y))           # pelo modelo
plot(fo, las = 1)           
points(x = y, y = fm, col = "blue", cex = 2)


boxplot(y ~ Trat)


mod.normal = lm(y ~ Trat)
anova(mod.normal)


mod.poisson = glm(y ~ Trat, family = poisson) 
summary(mod.poisson)

Call:
glm(formula = y ~ Trat, family = poisson)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.8284  -0.7071  -0.7071   1.1281   1.7579  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)   
(Intercept) -1.386e+00  9.999e-01  -1.386  0.16563   
TratB        2.398e+00  1.044e+00   2.296  0.02168 * 
TratC        4.569e-16  1.414e+00   0.000  1.00000   
TratD        2.773e+00  1.031e+00   2.690  0.00715 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 55.299  on 15  degrees of freedom
Residual deviance: 28.720  on 12  degrees of freedom
AIC: 59.93

Number of Fisher Scoring iterations: 5


anova(mod.poisson, test = "Chisq")


AIC(mod.normal, mod.poisson)


se = function(x) se = sqrt(var(x)/length(x))
s = aggregate(y ~ Trat, FUN = se)                           # amostral
library(emmeans)
med.n = lsmeans(mod.normal, "Trat", type = "response")      # pelo modelo
med.p = lsmeans(mod.poisson, "Trat", type = "response")     # pelo modelo
cat("\nSE(obs.):", round(s$y, 2))
med.n
med.p

SE(obs.): 0.25 1.03 0.25 2.04

 Trat lsmean   SE df lower.CL upper.CL
 A      0.25 1.16 12   -2.271     2.77
 B      2.75 1.16 12    0.229     5.27
 C      0.25 1.16 12   -2.271     2.77
 D      4.00 1.16 12    1.479     6.52

Confidence level used: 0.95

 Trat rate    SE  df asymp.LCL asymp.UCL
 A    0.25 0.250 Inf    0.0352      1.77
 B    2.75 0.829 Inf    1.5230      4.97
 C    0.25 0.250 Inf    0.0352      1.77
 D    4.00 1.000 Inf    2.4505      6.53

Confidence level used: 0.95 
Intervals are back-transformed from the log scale


rP = residuals(mod.poisson, type = "pearson")                # res. de Pearson
X2 = sum(rP^2)                                               # qui-quad. de Pearson
X2/mod.poisson$df.residual                                   # par. dispersão
pchisq(X2, df = mod.poisson$df.residual, lower.tail = FALSE) # p-valor


library(pscl)
mod.zip = zeroinfl(y ~ Trat | 1, dist = "poisson")
summary(mod.zip)

Call:
zeroinfl(formula = y ~ Trat | 1, dist = "poisson")

Pearson residuals:
    Min      1Q  Median      3Q     Max 
-1.3673 -0.4795 -0.4795  0.9738  1.4661 

Count model coefficients (poisson with log link):
              Estimate Std. Error z value Pr(>|z|)   
(Intercept) -1.145e+00  1.048e+00  -1.093   0.2745   
TratB        2.415e+00  1.091e+00   2.214   0.0268 * 
TratC       -2.978e-05  1.460e+00   0.000   1.0000   
TratD        2.813e+00  1.077e+00   2.612   0.0090 **

Zero-inflation model coefficients (binomial with logit link):
            Estimate Std. Error z value Pr(>|z|)
(Intercept)  -1.2329     0.8503   -1.45    0.147
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Number of iterations in BFGS optimization: 12 
Log-likelihood: -22.69 on 5 Df


exp(-1.233)/(1 + exp(-1.233))


rP = residuals(mod.zip, type = "pearson")                  # res. de Pearson
X2 = sum(rP^2)                                             # qui-quad. de Pearson
X2/mod.zip$df.residual
pchisq(X2, df = mod.zip$df.residual, lower.tail = FALSE)   # p-valor


AIC(mod.normal, mod.poisson, mod.zip)


cat("\nSE(obs.):", round(s$y, 3))                     # erros-padrão amostrais
med.z = lsmeans(mod.zip, "Trat", type = "response")
med.z

SE(obs.): 0.25 1.031 0.25 2.041

 Trat lsmean    SE  df asymp.LCL asymp.UCL
 A     0.246 0.255 Inf    -0.252     0.745
 B     2.758 0.988 Inf     0.822     4.693
 C     0.246 0.255 Inf    -0.252     0.745
 D     4.107 1.295 Inf     1.569     6.644

Confidence level used: 0.95


options(repr.plot.width = 9, repr.plot.height = 3, repr.plot.res = 200)


library(ggplot2); library(gridExtra)
p1 = plot(med.z, horizontal = FALSE) + ylim(-3, 7)
p2 = plot(med.p, horizontal = FALSE) + ylim(-3, 7)
p3 = plot(med.n, horizontal = FALSE) + ylim(-3, 7)
grid.arrange(p1, p2, p3, ncol = 3)

	Df	Sum Sq	Mean Sq	F value	Pr(>F)
	<int>	<dbl>	<dbl>	<dbl>	<dbl>
Trat	3	42.1875	14.062500	2.626459	0.09829004
Residuals	12	64.2500	5.354167	NA	NA

	Df	Deviance	Resid. Df	Resid. Dev	Pr(>Chi)
	<int>	<dbl>	<int>	<dbl>	<dbl>
NULL	NA	NA	15	55.29852	NA
Trat	3	26.57845	12	28.72007	7.215593e-06

Modelos lineares generalizados para dados de contagem e dados inflacionados de zero¶

Superdispersão¶

Exemplo de aplicação¶

Dados experimentais¶

Frequência relativa observada vs. freq. pelo modelo Poisson¶

LM versus GLM¶

ANOVA¶

GLM Poisson¶

ANODEV¶

Comparando os modelos por AIC (menor é melhor)¶

Mas o GLM Poisson conseguiu capturar de forma eficaz a variabilidade contida nos dados?¶

Um teste de superdispersão¶

GLM 'ZIP' (Zero-Inflated Poisson)¶

Capturando superdispersão com ZIP:¶

Probabilidade de excesso de zeros:¶

Ainda há superdispersão?¶

Comparando os três modelos¶

Intervalos de 95% de confiança¶

Alternativas para lidar com superdispersão/inflação de zeros¶

Obrigado!¶

	df	AIC
	<dbl>	<dbl>
mod.normal	5	77.64912
mod.poisson	4	59.93009
mod.zip	5	55.38440