3. 散点图

  • 3.10 向散点图添加边际地毯
  • 3.11 向散点图添加标签
  • 3.12 绘制气泡图
  • 3.13 绘制散点图矩阵
  • 往期文章
  • 参考书籍

3.10 向散点图添加边际地毯

# 使用 geom_rug() 函数添加边际地毯
ggplot(faithful, aes(x=eruptions, y=waiting)) + 
  geom_point() + 
  geom_rug()



R散点图中如何加回归趋势线 r语言散点图添加趋势线_svg


# 通过向边际地毯线的位置坐标添加扰动并设定size减小线宽可以减轻边际地毯线的重叠程度
ggplot(faithful, aes(x=eruptions, y=waiting)) + 
  geom_point() +
  geom_rug(position="jitter", size=.2)



R散点图中如何加回归趋势线 r语言散点图添加趋势线_可视化_02


3.11 向散点图添加标签

library(gcookbook)
# 以countries数据集为例,对各国医疗保健支出与婴儿死亡率之间的关系进行可视化
# 选取人均支出大于2000美元的国家的数据子集进行分析
subset(countries, Year==2009 & healthexp>2000)

sp <- ggplot(subset(countries, Year==2009 & healthexp>2000),
             aes(x=healthexp, y=infmortality)) +
  geom_point()
# annotate()函数指定标签坐标和标签文本
sp + annotate("text", x=4350, y=5.4, label="Canada") +
  annotate("text", x=7400, y=6.8, label="USA")



R散点图中如何加回归趋势线 r语言散点图添加趋势线_可视化_03


# geom_text()函数自动添加数据标签
sp + geom_text(aes(label=Name), size=4)



R散点图中如何加回归趋势线 r语言散点图添加趋势线_数学建模_04


调整标签位置,大家自行尝试。

# 对标签的位置进行调整
sp + geom_text(aes(label=Name), size=4, vjust=0)
sp + geom_text(aes(y=infmortality+.1, label=Name), size=4, vjust=0)

sp + geom_text(aes(label=Name), size=4, hjust=0)
sp + geom_text(aes(x=healthexp+100, label=Name), size=4, hjust=0)

如何只对自己想要的数据点添加标签。

注:有很多人在后台问我如何在火山图里给自己想要的基因添加注释。这里提供了一个思路。

# 新建一个数据
cdat <- subset(countries, Year==2009 & healthexp>2000)
cdat$Name1 <- cdat$Name
# 用%in%运算符找出绘图时希望抱怨的标签
idx <- cdat$Name1 %in% c("Canada", "Ireland", "United Kingdom", "United States",
                         "New Zealand", "Iceland", "Japan", "Luxembourg",
                         "Netherlands", "Switzerland")
idx
# 根据上面的逻辑向量用 NA 重写变量 Name1 中的其它取值
cdat$Name1[!idx] <- NA
cdat
ggplot(cdat, aes(x=healthexp, y=infmortality)) +
  geom_point() +
  geom_text(aes(x=healthexp+100, label=Name1), size=4, hjust=0) +
  xlim(2000, 10000)



R散点图中如何加回归趋势线 r语言散点图添加趋势线_可视化_05


3.12 绘制气泡图

调用 geom_point()scale_size_area() 函数即可绘制气泡图。

# 示例数据
library(gcookbook) # For the data set
cdat <- subset(countries, Year==2009 &
                 Name %in% c("Canada", "Ireland", "United Kingdom", "United States",
                             "New Zealand", "Iceland", "Japan", "Luxembourg",
                             "Netherlands", "Switzerland"))

cdat
> cdat
                Name Code Year       GDP laborrate healthexp infmortality
1733          Canada  CAN 2009  39599.04      67.8  4379.761          5.2
4436         Iceland  ISL 2009  37972.24      77.5  3130.391          1.7
4691         Ireland  IRL 2009  49737.93      63.6  4951.845          3.4
4946           Japan  JPN 2009  39456.44      59.5  3321.466          2.4
5864      Luxembourg  LUX 2009 106252.24      55.5  8182.855          2.2
7088     Netherlands  NLD 2009  48068.35      66.1  5163.740          3.8
7190     New Zealand  NZL 2009  29352.45      68.6  2633.625          4.9
9587     Switzerland  CHE 2009  63524.65      66.9  7140.729          4.1
10454 United Kingdom  GBR 2009  35163.41      62.2  3285.050          4.7
10505  United States  USA 2009  45744.56      65.0  7410.163          6.6
p <- ggplot(cdat, aes(x=healthexp, y=infmortality, size=GDP)) +
  geom_point(shape=21, colour="black", fill="cornsilk")

# 将GDP 映射给半径 (scale_size_continuous)
p



R散点图中如何加回归趋势线 r语言散点图添加趋势线_可视化_06


# 将GDP 映射给面积
p + scale_size_area(max_size=15)



R散点图中如何加回归趋势线 r语言散点图添加趋势线_svg_07


如果x轴,y轴皆是分类变量,气泡图可以用来表示网格上的变量值。

# 对男性组和女性组求和
hec <- HairEyeColor[,,"Male"] + HairEyeColor[,,"Female"]

# 转化为长格式(long format)
library(reshape2)
hec <- melt(hec, value.name="count")

ggplot(hec, aes(x=Eye, y=Hair)) +
  geom_point(aes(size=count), shape=21, colour="black", fill="cornsilk") +
  scale_size_area(max_size=20, guide=FALSE) +
  geom_text(aes(y=as.numeric(Hair)-sqrt(count)/22, label=count), vjust=1,
            colour="grey60", size=4)



R散点图中如何加回归趋势线 r语言散点图添加趋势线_数学建模_08


3.13 绘制散点图矩阵

散点图矩阵是一种对多个变量两两之间关系进行可视化的有效方法。pairs()函数可以绘制散点图矩阵。

注:现在散点图矩阵有现成的R包(如GGally_ggpairs)。以下内容仅供了解。

# 示例数据
library(gcookbook) # For the data set
c2009 <- subset(countries, Year==2009,
                select=c(Name, GDP, laborrate, healthexp, infmortality))

head(c2009
> head(c2009)
              Name      GDP laborrate  healthexp infmortality
50     Afghanistan       NA      59.8   50.88597        103.2
101        Albania 3772.605      59.5  264.60406         17.2
152        Algeria 4022.199      58.5  267.94653         32.0
203 American Samoa       NA        NA         NA           NA
254        Andorra       NA        NA 3089.63589          3.1
305         Angola 4068.576      81.3  203.80787         99.9
pairs(c2009[,2:5])



R散点图中如何加回归趋势线 r语言散点图添加趋势线_数据可视化_09


# 定义一个panel.cor函数来展示变量两两之间的相关系数以代替默认的散点图
panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...) {
  usr <- par("usr")
  on.exit(par(usr))
  par(usr = c(0, 1, 0, 1))
  r <- abs(cor(x, y, use="complete.obs"))
  txt <- format(c(r, 0.123456789), digits=digits)[1]
  txt <- paste(prefix, txt, sep="")
  if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
  text(0.5, 0.5, txt, cex =  cex.cor * (1 + r) / 2)
}
# 定义 panel.hist 函数展示各个变量的直方图
panel.hist <- function(x, ...) {
  usr <- par("usr")
  on.exit(par(usr))
  par(usr = c(usr[1:2], 0, 1.5) )
  h <- hist(x, plot = FALSE)
  breaks <- h$breaks
  nB <- length(breaks)
  y <- h$counts
  y <- y/max(y)
  rect(breaks[-nB], 0, breaks[-1], y, col="white", ...)
}
pairs(c2009[,2:5], upper.panel = panel.cor,
      diag.panel  = panel.hist,
      lower.panel = panel.smooth)



R散点图中如何加回归趋势线 r语言散点图添加趋势线_数学建模_10


# 线性模型替代lowess 模型
panel.lm <- function (x, y, col = par("col"), bg = NA, pch = par("pch"),
                      cex = 1, col.smooth = "black", ...) {
  points(x, y, pch = pch, col = col, bg = bg, cex = cex)
  abline(stats::lm(y ~ x),  col = col.smooth, ...)
}


pairs(c2009[,2:5], pch=".",
      upper.panel = panel.cor,
      diag.panel  = panel.hist,
      lower.panel = panel.lm)



R散点图中如何加回归趋势线 r语言散点图添加趋势线_可视化_11