展示各类回归模型的回归线绘制方法,包括通用绘制方法以及ggplot2提供的一些回归线简单绘制方法:
- 线性回归
- 多项式回归
- loess(局部加权)回归
- 分段线性回归
- 样条回归
- 稳健回归
- 分位数回归
library(ggplot2)
library(MASS)
library(splines)
示例数据
使用R自带的mtcars汽车数据集,研究mpg与wt这两个连续变量的关系
print(head(mtcars))
# mpg cyl disp hp drat wt qsec vs am gear carb
# Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
# Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
# Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
# Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
# Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
# Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
str(mtcars)
# 'data.frame': 32 obs. of 11 variables:
# $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
# $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
# $ disp: num 160 160 108 258 360 ...
# $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
# $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
# $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
# $ qsec: num 16.5 17 18.6 19.4 17 ...
# $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
# $ am : num 1 1 1 0 0 0 0 0 0 0 ...
# $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
# $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
绘制mpg与wt的散点图
p <- ggplot() +
geom_point(data = mtcars, mapping = aes(x = mpg, y = wt)) +
theme_bw()
p
绘制各类回归模型的回归线的通用方式
- 拟合回归模型
- 利用模型求出给定x,y的拟合值,以及拟合值的置信区间
- 利用geom_line绘制回归线,利用geom_ribbon绘制置信区间阴影
线性回归
# 拟合简单线性回归模型
lm <- lm(wt ~ mpg, data = mtcars)
# 利用模型求出给定x,y的拟合值,以及拟合值的置信区间
new_mpg <- seq(min(mtcars$mpg), max(mtcars$mpg), 0.01)
pred_wt <- data.frame(predict(lm, newdata = data.frame(mpg = new_mpg),
interval = "confidence"),
new_mpg = new_mpg)
print(head(pred_wt))
# fit lwr upr new_mpg
# 1 4.582291 4.240349 4.924232 10.40
# 2 4.580882 4.239197 4.922566 10.41
# 3 4.579473 4.238045 4.920901 10.42
# 4 4.578065 4.236893 4.919236 10.43
# 5 4.576656 4.235741 4.917571 10.44
# 6 4.575247 4.234589 4.915906 10.45
# 利用geom_line绘制回归线,利用geom_ribbon绘制置信区间阴影
p +
geom_line(data = pred_wt, mapping = aes(x = new_mpg, y = fit),
color = "red", size = 1, alpha = 0.5) +
geom_ribbon(data = pred_wt, mapping = aes(x = new_mpg,
ymin = lwr, ymax = upr),
fill = "grey", alpha = 0.5)
绘制线性回归线的另一种方式,根据斜率和截距使用geom_abline
绘图
p +
geom_abline(slope = lm$coefficients[2], intercept = lm$coefficients[1],
color = "red", size = 1, alpha = 0.5) +
geom_ribbon(data = pred_wt, mapping = aes(x = new_mpg,
ymin = lwr, ymax = upr),
fill = "grey", alpha = 0.5)
多项式回归
# 拟合二次多项式回归模型
poly <- lm(wt ~ mpg + I(mpg ^ 2), data = mtcars)
new_mpg <- seq(min(mtcars$mpg), max(mtcars$mpg), 0.01)
pred_wt <- data.frame(predict(poly, newdata = data.frame(mpg = new_mpg),
interval = "confidence"),
new_mpg = new_mpg)
print(head(pred_wt))
# fit lwr upr new_mpg
# 1 5.052883 4.560239 5.545526 10.40
# 2 5.050333 4.558547 5.542119 10.41
# 3 5.047784 4.556854 5.538714 10.42
# 4 5.045236 4.555162 5.535310 10.43
# 5 5.042689 4.553469 5.531909 10.44
# 6 5.040143 4.551776 5.528509 10.45
p +
geom_line(data = pred_wt, mapping = aes(x = new_mpg, y = fit),
color = "red", size = 1, alpha = 0.5) +
geom_ribbon(data = pred_wt, mapping = aes(x = new_mpg,
ymin = lwr, ymax = upr),
fill = "grey", alpha = 0.5)
ggplot2提供了一些回归线的简单绘制方式
线性回归,geom_smooth指定method = “lm”
p +
geom_smooth(data = mtcars, mapping = aes(x = mpg, y = wt),
method = "lm")
loess(局部加权)回归,geom_smooth指定method = “loess”
p +
geom_smooth(data = mtcars, mapping = aes(x = mpg, y = wt),
method = "loess")
gam(广义可加模型),geom_smooth指定method = “gam”,同时指定formula的具体形式
广义可加模型拓展了广义线性模型,允许自变量采取各类形式的变换,以2个自变量为例:
- 广义线性模型可表示为:
- 广义可加模型可表示为:
可以是各类变换,如多项式变换;甚至是非参数变换,如(loess为局部加权多项式)
使用gam绘制线性回归
p +
geom_smooth(data = mtcars, mapping = aes(x = mpg, y = wt),
method = "gam", formula = y ~ x)
使用gam绘制多项式回归
# 二次多项式回归
p +
geom_smooth(data = mtcars, mapping = aes(x = mpg, y = wt),
method = "gam", formula = y ~ x + I(x ^ 2))
使用gam绘制分段线性回归
p +
geom_smooth(data = mtcars, mapping = aes(x = mpg, y = wt),
method = "gam", formula = y ~ x + I((x - 20) * (x > 20))) +
geom_vline(xintercept = 20, linetype = 2, color = "red")
使用gam绘制样条回归
# 样条设置1个节点,立方样条
p +
geom_smooth(data = mtcars, mapping = aes(x = mpg, y = wt),
method = "gam", formula = y ~ bs(x, knots = 1, degree = 3))
其他的一些回归线绘制
稳健回归
- 可用于解决回归中的离群值问题
- 绘图时,内部调用
MASS::rlm()
拟合稳健回归
p +
geom_smooth(data = mtcars, mapping = aes(x = mpg, y = wt),
method = "rlm")
分位数回归
- 可用于解决误差项非正态问题
- 分别绘制0.1、0.5、0.9的的回归线
- 绘图时,内部调用
quantreg::rq()
拟合分位数回归
p +
geom_quantile(data = mtcars, mapping = aes(x = mpg, y = wt,
color = "0.1 quantile"),
quantiles = 0.1) +
geom_quantile(data = mtcars, mapping = aes(x = mpg, y = wt,
color = "0.5 quantile"),
quantiles = 0.5) +
geom_quantile(data = mtcars, mapping = aes(x = mpg, y = wt,
color = "0.9 quantile"),
quantiles = 0.9) +
labs(color = "Quantile")