2.折线图
这个系列是关于R中基础图形和进阶图形的绘制。视频课程会陆续更新到我的B站【木舟笔记】,希望大家多多支持!
折线图通常用来对两个连续变量的相互依存关系进行可视化,其中,x轴对应于自变量,y轴对应于因变量。折线图的x轴通常对应的是连续型变量或者有序离散型变量。
- 2.1 绘制简单折线图
- 2.2 向折线图添加数据表记
- 2.3 绘制多重折线图
- 2.4 修改线条样式
- 2.5 修改数据标记样式
- 2.6 绘制面积图
- 2.7 绘制堆积面积图
- 2.8 绘制百分比面积堆积图
- 2.9 添加置信域
- 参考书籍
2.1 绘制简单折线图
library(ggplot2)
ggplot(BOD, aes(x = Time, y = demand)) + geom_line()
BOD
## Time demand
## 1 1 8.3
## 2 2 10.3
## 3 3 19.0
## 4 4 16.0
## 5 5 15.6
## 6 7 19.8
BOD1 <- BOD # Make a copy of the data
BOD1$Time <- factor(BOD1$Time) #转为因子型变量
ggplot(BOD1, aes(x = Time, y = demand, group = 1)) + geom_line()
数据集BOD
中没有对应于Time=6
的数据点,因此Time
被转化为因子型变量时,它并没有6
这个水平。
可以运行ylim()
设定y轴范围或者运行含一个参数的expand_limit()
扩展y轴的范围。
# 以下结果都是相同的
ggplot(BOD, aes(x = Time, y = demand)) + geom_line() + ylim(0, max(BOD$demand))
ggplot(BOD, aes(x = Time, y = demand)) + geom_line() + expand_limits(y = 0)
2.2 向折线图添加数据表记
ggplot(BOD, aes(x = Time, y = demand)) + geom_line() + geom_point()
library(gcookbook)
# wordlpop 对应的采集时间间隔不是常数。时间越近的采集频率越高。
ggplot(worldpop, aes(x = Year, y = Population)) + geom_line() + geom_point()
# 当y轴取对数时也一样
ggplot(worldpop, aes(x = Year, y = Population)) + geom_line() + geom_point() + scale_y_log10()
2.3 绘制多重折线图
# 载入plyr,便于使用ddply() 创建样本数据集
library(plyr)
# 汇总ToothGrowth 数据集
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))
# 将 supp 映射给 colour
ggplot(tg, aes(x=dose, y=length, colour=supp)) + geom_line()
# 将 supp 映射给 线型 linetype
ggplot(tg, aes(x=dose, y=length, linetype=supp)) + geom_line()
#
ggplot(tg, aes(x=factor(dose), y=length, colour=supp, group=supp)) + geom_line()
# 不能缺失group=supp语句,否则ggplot()会不知如何将数据组合在一起,从而报错
ggplot(tg, aes(x=factor(dose), y=length, colour=supp)) + geom_line()
plot of chunk unnamed-chunk-3
# 分组不正确也有可能变成锯齿状
ggplot(tg, aes(x=dose, y=length)) + geom_line()
plot of chunk unnamed-chunk-3
ggplot(tg, aes(x=dose, y=length, shape=supp)) + geom_line() + geom_point(size=4) # 更大的点
ggplot(tg, aes(x=dose, y=length, fill=supp)) + geom_line() + geom_point(size=4, shape=21) #使用有填充色的点
# 数据标记相互重叠,需要相应的移动标记点以及连接线。
ggplot(tg, aes(x=dose, y=length, shape=supp)) +
geom_line(position=position_dodge(0.2)) +#将连接线左右移动0.2
geom_point(position=position_dodge(0.2), size=4) # 将点的位置左右移动0.2
2.4 修改线条样式
通过设置线型(linetype
)、线宽(size
) 和颜色(colour
)参数可以分别修改折现的线型、线宽和颜色。
ggplot(BOD, aes(x = Time, y = demand)) +
geom_line(linetype = "dashed", size = 1, colour = "blue")
library(plyr)
tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length = mean(len))
ggplot(tg, aes(x = dose, y = length, colour = supp)) +
geom_line() +
scale_colour_brewer(palette = "Set1"))
# 在aes()函数外部设定参数则会对所有折线图有效
ggplot(tg, aes(x = dose, y = length, group = supp)) +
geom_line(colour = "darkgreen", size = 1.5)
# supp被映射给了颜色,所以自动作为分组变量
ggplot(tg, aes(x = dose, y = length, colour = supp)) +
geom_line(linetype = "dashed") +
geom_point(shape = 22, size = 3, fill = "white")
2.5 修改数据标记样式
# geom_point()设置点大小、颜色、填充
ggplot(BOD,aes(x = Time,y = demand)) +
geom_line() +
geom_point(size = 4,shape = 22,colour = "darkred",fill = "pink")
ggplot(BOD, aes(x = Time, y = demand)) +
geom_line() +
geom_point(size = 4,shape = 21, fill = "white")
pd <- position_dodge(0.2)
ggplot(tg, aes(x = dose, y = length, fill = supp)) +
geom_line(position = pd) +
geom_point(shape = 21, size = 3, position = pd) +
scale_fill_manual(values = c("black","white"))
2.6 绘制面积图
运行 geom_area()
函数即可绘制面积图
# 将sunspot.year数据集转化为数据框,便于本例使用
sunspotyear <- data.frame(Year = as.numeric(time(sunspot.year)), Sunspots = as.numeric(sunspot.year))ggplot(sunspotyear, aes(x = Year, y = Sunspots)) + geom_area()
# 颜色、透明度设置
ggplot(sunspotyear, aes(x = Year, y = Sunspots)) +
geom_area(colour = "black",fill = "blue", alpha = 0.2)
# 去掉底部横线 不设定colour,使用geom_line()绘制轨迹
ggplot(sunspotyear, aes(x = Year, y = Sunspots)) +
geom_area(fill = "blue",alpha = 0.2) +
geom_line()
2.7 绘制堆积面积图
library(gcookbook)
ggplot(uspopage, aes(x = Year, y = Thousands, fill = AgeGroup)) + geom_area()
head(uspopage)
> head(uspopage)
Year AgeGroup Thousands
1 1900 <5 9181
2 1900 5-14 16966
3 1900 15-24 14951
4 1900 25-34 12161
5 1900 35-44 9273
6 1900 45-54 6437
# 通过设定breaks翻转堆积顺序
# 透明度、颜色、大小设置
ggplot(uspopage, aes(x = Year, y = Thousands, fill = AgeGroup)) +
geom_area(colour = "black", size = 0.2, alpha = 0.4) +
scale_fill_brewer(palette = "Blues", breaks = rev(levels(uspopage$AgeGroup)))
# 设定order = desc(AgeGroup) 可以对堆积顺序进行反转
library(plyr)
ggplot(uspopage, aes(x = Year, y = Thousands, fill = AgeGroup, order = desc(AgeGroup))) +
geom_area(colour = "black", size = 0.2, alpha = 0.4) +
scale_fill_brewer(palette = "Blues")
# 去掉框线
ggplot(uspopage, aes(x = Year, y = Thousands, fill = AgeGroup, order = desc(AgeGroup))) +
geom_area(colour = NA, alpha = 0.4) +
scale_fill_brewer(palette = "Blues") +
geom_line(position = "stack", size = 0.2)
2.8 绘制百分比面积堆积图
# 先计算百分比
uspopage_prop <- ddply(uspopage, "Year", transform, Percent = Thousands/sum(Thousands) * 100)
ggplot(uspopage_prop, aes(x = Year, y = Percent, fill = AgeGroup)) +
geom_area(colour = "black", size = 0.2, alpha = 0.4) +
scale_fill_brewer(palette = "Blues", breaks = rev(levels(uspopage$AgeGroup)))
head(uspopage)
> head(uspopage)
Year AgeGroup Thousands
1 1900 <5 9181
2 1900 5-14 16966
3 1900 15-24 14951
4 1900 25-34 12161
5 1900 35-44 9273
6 1900 45-54 6437
uspopage_prop <- ddply(uspopage, "Year", transform, Percent = Thousands/sum(Thousands) * 100)
2.9 添加置信域
运行 geom_ribbon()
分别映射一个变量给 ymin
和 ymax
。
climate
数据集中的Anomaly10y
变量表示了各年温度相对于1950-1980平均水平变异的10年移动平均。Unc10y
表示其95%置信区间。
library(gcookbook)
# 抓取 climate 数据的一个子集
clim <- subset(climate, Source == "Berkeley", select = c("Year", "Anomaly10y", "Unc10y"))
head(clim)
> head(clim)
Year Anomaly10y Unc10y
1 1800 -0.435 0.505
2 1801 -0.453 0.493
3 1802 -0.460 0.486
4 1803 -0.493 0.489
5 1804 -0.536 0.483
6 1805 -0.541 0.475
# 将置信域绘制为阴影
# 注意一下图层的顺序
ggplot(clim, aes(x = Year, y = Anomaly10y)) +
geom_ribbon(aes(ymin = Anomaly10y - Unc10y, ymax = Anomaly10y + Unc10y), alpha = 0.2) +
geom_line()
# 使用虚线表示置信域的上下边界
ggplot(clim, aes(x = Year, y = Anomaly10y)) +
geom_line(aes(y = Anomaly10y -Unc10y), colour = "grey50", linetype = "dotted") +
geom_line(aes(y = Anomaly10y +Unc10y), colour = "grey50", linetype = "dotted") +
geom_line()
参考书籍
- R Graphics Cookbook, 2nd edition.