1. 条形图


  • 1.1 绘制简单条形图
  • 1.2 绘制簇状条形图
  • 1.3 绘制频数条形图
  • 1.4 条形图着色
  • 1.5 对正负条形图分别着色
  • 1.6 调整条形宽度和条形间距
  • 1.7 绘制堆积条形图
  • 1.8 绘制百分比堆积条形图
  • 1.9 添加数据标签
  • 1.10 绘制 Cleveland 点图

1.1 绘制简单条形图

library(ggplot2)
library(gcookbook)  #数据集
ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity")

R语言画图框线条粗细 r语言画条形图_ggplot2

# 没有 Time == 6
BOD
##   Time demand
## 1    1    8.3
## 2    2   10.3
## 3    3   19.0
## 4    4   16.0
## 5    5   15.6
## 6    7   19.8
# Time 是数值型(连续型)变量
str(BOD)
## 'data.frame': 6 obs. of  2 variables:
##  $ Time  : num  1 2 3 4 5 7
##  $ demand: num  8.3 10.3 19 16 15.6 19.8
##  - attr(*, "reference")= chr "A1.4, p. 270"
ggplot(BOD, aes(x = Time, y = demand)) + geom_bar(stat = "identity")

R语言画图框线条粗细 r语言画条形图_css_02

# 使用 factor() 将 Time 转化为离散型(分类) 变量 
ggplot(BOD, aes(x = factor(Time), y = demand)) + geom_bar(stat = "identity")

R语言画图框线条粗细 r语言画条形图_ggplot2_03

#fill 改变填充颜色 colour 改变边框线颜色
ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", fill = "lightblue", 
    colour = "black")

R语言画图框线条粗细 r语言画条形图_css_04

1.2 绘制簇状条形图

library(gcookbook)  # For the data set
cabbage_exp
##   Cultivar Date Weight     sd  n      se
## 1      c39  d16   3.18 0.9566 10 0.30251
## 2      c39  d20   2.80 0.2789 10 0.08819
## 3      c39  d21   2.74 0.9834 10 0.31098
## 4      c52  d16   2.26 0.4452 10 0.14079
## 5      c52  d20   3.11 0.7909 10 0.25009
## 6      c52  d21   1.47 0.2111 10 0.06675
# 通过将分类变量映射给fill参数来绘制簇状条形图。使用参数position = "dodge",使得两组条形在水平方向错开排列,否则会输出堆积条形图。
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(position = "dodge",stat = "identity")

R语言画图框线条粗细 r语言画条形图_css_05

# colour = "black" 为条形图添加黑色边框线
# scale_fill_brewer()或者scale_fill_manual()函数对图形颜色进行设置
# 这里使用RColorBrewer包的Pastel1调色盘进行调色
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",position = "dodge", 
    colour = "black") + scale_fill_brewer(palette = "Pastel1")

R语言画图框线条粗细 r语言画条形图_ggplot2_06

## 如果分类变量有缺失,那么绘图结果中的相应地略去不绘,同时临近的条形会自动的扩充到相应位置。
ce <- cabbage_exp[1:5, ]  # 复制删除了最后一行的数据集
> ce
  Cultivar Date Weight        sd  n
1      c39  d16   3.18 0.9566144 10
2      c39  d20   2.80 0.2788867 10
3      c39  d21   2.74 0.9834181 10
4      c52  d16   2.26 0.4452215 10
5      c52  d20   3.11 0.7908505 10
          se
1 0.30250803
2 0.08819171
3 0.31098410
4 0.14079141
5 0.25008887
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",position = "dodge", colour = "black") + scale_fill_brewer(palette = "Pastel1")
## 可以在分类变量组合缺失的那一项为变量y手动输入一个NA值,来调整。

R语言画图框线条粗细 r语言画条形图_svg_07

1.3 绘制频数条形图

## diamonds 数据集共有53490行数据, 每行数据对应一颗钻石的品质信息
> diamonds
# A tibble: 53,940 x 10
   carat cut       color clarity depth table
   <dbl> <ord>     <ord> <ord>   <dbl> <dbl>
 1  0.23 Ideal     E     SI2      61.5    55
 2  0.21 Premium   E     SI1      59.8    61
 3  0.23 Good      E     VS1      56.9    65
 4  0.29 Premium   I     VS2      62.4    58
 5  0.31 Good      J     SI2      63.3    58
 6  0.24 Very Good J     VVS2     62.8    57
 7  0.24 Very Good I     VVS1     62.3    57
 8  0.26 Very Good H     SI1      61.9    55
 9  0.22 Fair      E     VS2      65.1    61
10  0.23 Very Good H     VS1      59.4    61
# ... with 53,930 more rows, and 4 more
#   variables: price <int>, x <dbl>,
#   y <dbl>, z <dbl>
## geom_bar() 函数在默认情况下将参数设定为 stat="bin",该操作会自动计算每组变量对应的观测数。
ggplot(diamonds, aes(x = cut)) + geom_bar()

R语言画图框线条粗细 r语言画条形图_数据可视化_08

##  如果x轴对应的是连续型变量,我们会得到一张直方图。
ggplot(diamonds, aes(x = carat)) + geom_bar()

R语言画图框线条粗细 r语言画条形图_ggplot2_09

1.4 条形图着色

library(gcookbook)
##  以uspopchange为例。该数据集描述了美国各州人口自2000至2010年的变化情况。选取人口增长最快的10个州进行绘图。根据地区信息进行着色。

upc <- subset(uspopchange, rank(Change)>40)
> upc
            State Abb Region Change
3         Arizona  AZ   West   24.6
6        Colorado  CO   West   16.9
10        Florida  FL  South   17.6
11        Georgia  GA  South   18.3
13          Idaho  ID   West   21.1
29         Nevada  NV   West   35.1
34 North Carolina  NC  South   18.5
41 South Carolina  SC  South   15.3
44          Texas  TX  South   20.6
45           Utah  UT   West   23.8
ggplot(upc, aes(x = Abb, y = Change, fill = Region)) + geom_bar(stat = "identity")

R语言画图框线条粗细 r语言画条形图_svg_10

## 使用scale_fill_manual()对颜色进行重新设定
ggplot(upc, aes(x = reorder(Abb, Change), y = Change, fill = Region)) + geom_bar(stat = "identity", 
    colour = "black") + scale_fill_manual(values = c("#669933", "#FFCC66")) + 
    xlab("State")

R语言画图框线条粗细 r语言画条形图_ggplot2_11

1.5 对正负条形图分别着色

library(gcookbook)  
#以climate数据的一个子集为例
csub <- subset(climate, Source == "Berkeley" & Year >= 1900)
csub$pos <- csub$Anomaly10y >= 0
head(csub)
> head(csub)
      Source Year Anomaly1y Anomaly5y
101 Berkeley 1900        NA        NA
102 Berkeley 1901        NA        NA
103 Berkeley 1902        NA        NA
104 Berkeley 1903        NA        NA
105 Berkeley 1904        NA        NA
106 Berkeley 1905        NA        NA
    Anomaly10y Unc10y   pos
101     -0.171  0.108 FALSE
102     -0.162  0.109 FALSE
103     -0.177  0.108 FALSE
104     -0.199  0.104 FALSE
105     -0.223  0.105 FALSE
106     -0.241  0.107 FALSE
ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) + geom_bar(stat = "identity",     position = "identity")

R语言画图框线条粗细 r语言画条形图_数据可视化_12

## 使用scale_fill_manual()对颜色进行调整,设定参数 guide = FALSE可以删除图例,通过设定边框线colour和size(宽度),来调整边框。单位是毫米。
ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) + geom_bar(stat = "identity",     position = "identity", colour = "black", size = 0.25) + scale_fill_manual(values = c("#CCEEFF",     "#FFDDDD"), guide = FALSE)

R语言画图框线条粗细 r语言画条形图_svg_13

1.6 调整条形宽度和条形间距

library(gcookbook)  
# geom_bar()函数的参数 width 可以使条形变得更宽或者更窄,该参数的默认值为0.9
ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity")

R语言画图框线条粗细 r语言画条形图_数据可视化_14

ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", width = 0.5)

R语言画图框线条粗细 r语言画条形图_ggplot2_15

ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", width = 1)

R语言画图框线条粗细 r语言画条形图_数据可视化_16

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",width = 0.5, position = "dodge")

R语言画图框线条粗细 r语言画条形图_数据可视化_17

# 增加组内条形的间距
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    width = 0.5, position = position_dodge(0.7))

R语言画图框线条粗细 r语言画条形图_ggplot2_18

以下4个命令是等价的:

geom_bar(position = "dodge")
geom_bar(width = 0.9, position = position_dodge())
geom_bar(position = position_dodge(0.9))
geom_bar(width = 0.9, position = position_dodge(width = 0.9))

1.7 绘制堆积条形图

library(gcookbook) 
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity")

R语言画图框线条粗细 r语言画条形图_数据可视化_19

## 默认的堆积条形图的条形堆积顺序于图例顺序相反,可以通过guides()函数进行调整,并指定图例所需要调整的图形属性。
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity") + guides(fill = guide_legend(reverse = TRUE))

R语言画图框线条粗细 r语言画条形图_R语言画图框线条粗细_20

# 可以通过指定映射中的参数 order = desc()来调整堆叠顺序
library(plyr)  # 为了使用desc()函数
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar, order = desc(Cultivar))) + 
    geom_bar(stat = "identity")

R语言画图框线条粗细 r语言画条形图_css_21

# 使用cale_fill_brewer()调整颜色,设定colour = "black" 为条形添加黑色边框线
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",     colour = "black") + guides(fill = guide_legend(reverse = TRUE)) + scale_fill_brewer(palette = "Pastel1")

R语言画图框线条粗细 r语言画条形图_数据可视化_22



1.8 绘制百分比堆积条形图

## 首先,通过plyr包中的ddply()函数和transform()函数将每组条形对应的数据标准化为100%格式。之后,针对计算结果绘制堆积条形图即可。
library(gcookbook)  
library(plyr)
ce <- ddply(cabbage_exp, "Date", transform, percent_weight = Weight/sum(Weight) *100)
ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) + geom_bar(stat = "identity")

R语言画图框线条粗细 r语言画条形图_数据可视化_23



ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) + geom_bar(stat = "identity",     colour = "black") + guides(fill = guide_legend(reverse = TRUE)) + scale_fill_brewer(palette = "Pastel1")

R语言画图框线条粗细 r语言画条形图_css_24

1.9 添加数据标签

## geom_text()可为条形图添加数据标签
## 设定 vjust()可以将标签位置移动至条形图顶端的上方或者下方
library(gcookbook)  
# 在条形图顶端下方
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") + geom_text(aes(label = Weight), vjust = 1.5, colour = "white")

R语言画图框线条粗细 r语言画条形图_数据可视化_25



# 在条形图顶端上方
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") +     geom_text(aes(label = Weight), vjust = -0.2)

R语言画图框线条粗细 r语言画条形图_svg_26



# 将y轴上限变大
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + 
  geom_bar(stat = "identity") + 
  geom_text(aes(label = Weight), vjust = -0.2) + 
  ylim(0, max(cabbage_exp$Weight) * 1.05)

R语言画图框线条粗细 r语言画条形图_ggplot2_27



# 设定标签的y轴位置
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") +     geom_text(aes(y = Weight + 0.1, label = Weight))

R语言画图框线条粗细 r语言画条形图_R语言画图框线条粗细_28



ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", position = "dodge") + geom_text(aes(label = Weight), vjust = 1.5, colour = "white",     position = position_dodge(0.9), size = 3)

R语言画图框线条粗细 r语言画条形图_数据可视化_29



## 向堆积条形图添加数据标签
## 对每组条形对应的数据进行求和 可以使用plyr包的arrange()函数完成上述操作
library(plyr)
# 根据日期和性别对数据进行排序
ce <- arrange(cabbage_exp, Date, Cultivar)
# 计算累计和
ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight))
ce
##   Cultivar Date Weight     sd  n      se label_y
## 1      c39  d16   3.18 0.9566 10 0.30251    3.18
## 2      c52  d16   2.26 0.4452 10 0.14079    5.44
## 3      c39  d20   2.80 0.2789 10 0.08819    2.80
## 4      c52  d20   3.11 0.7909 10 0.25009    5.91
## 5      c39  d21   2.74 0.9834 10 0.31098    2.74
## 6      c52  d21   1.47 0.2111 10 0.06675    4.21
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + 
  geom_bar(stat = "identity") +     
  geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")

R语言画图框线条粗细 r语言画条形图_svg_30

ce <- arrange(cabbage_exp, Date, Cultivar)

# 计算y轴位置, 将数据标签置于条形中部
ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight) - 0.5 * Weight)

ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity") + 
    geom_text(aes(y = label_y, label = Weight), colour = "white")

R语言画图框线条粗细 r语言画条形图_css_31

#修饰
#paste()函数在标签后面添加"kg", format()函数 令标签保留两位小数
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + 
  geom_bar(stat = "identity", colour = "black") + 
  geom_text(aes(y = label_y, label = paste(format(Weight,nsmall = 2), "kg")), size = 4) + 
  guides(fill = guide_legend(reverse = TRUE)) + 
  scale_fill_brewer(palette = "Pastel1")

R语言画图框线条粗细 r语言画条形图_数据可视化_32

1.10 绘制 Cleveland 点图

library(gcookbook) 
tophit <- tophitters2001[1:25, ] # 取出 tophitters 数据集中的前25个数据

ggplot(tophit, aes(x=avg, y=name)) + geom_point()

R语言画图框线条粗细 r语言画条形图_svg_33

tophit[, c("name", "lg", "avg")]
##                 name lg    avg
## 1       Larry Walker NL 0.3501
## 2      Ichiro Suzuki AL 0.3497
## 3       Jason Giambi AL 0.3423
## 4     Roberto Alomar AL 0.3357
## 5        Todd Helton NL 0.3356
## 6        Moises Alou NL 0.3314
## 7      Lance Berkman NL 0.3310
## 8         Bret Boone AL 0.3307
## 9  Frank Catalanotto AL 0.3305
## 10     Chipper Jones NL 0.3304
## 11     Albert Pujols NL 0.3288
## 12       Barry Bonds NL 0.3277
## 13        Sammy Sosa NL 0.3276
## 14       Juan Pierre NL 0.3274
## 15     Juan Gonzalez AL 0.3252
## 16     Luis Gonzalez NL 0.3251
## 17      Rich Aurilia NL 0.3239
## 18      Paul Lo Duca NL 0.3196
## 19        Jose Vidro NL 0.3189
## 20    Alex Rodriguez AL 0.3180
## 21       Cliff Floyd NL 0.3171
## 22   Shannon Stewart AL 0.3156
## 23      Jeff Cirillo NL 0.3125
## 24       Jeff Conine AL 0.3111
## 25       Derek Jeter AL 0.3111
# reorder(name, avg)先将 name 转化为因子,然后根据 avg 对其进行排序
ggplot(tophit, aes(x=avg, y=reorder(name, avg))) +
    geom_point(size=3) + 
    theme_bw() +
    theme(panel.grid.major.x = element_blank(),
          panel.grid.minor.x = element_blank(),
          panel.grid.major.y = element_line(colour="grey60", linetype="dashed"))

R语言画图框线条粗细 r语言画条形图_ggplot2_34

# x,y轴互换
ggplot(tophit, aes(x=reorder(name, avg), y=avg)) +
    geom_point(size=3) +                       
    theme_bw() +
    theme(axis.text.x = element_text(angle=60, hjust=1),
          panel.grid.major.y = element_blank(),
          panel.grid.minor.y = element_blank(),
          panel.grid.major.x = element_line(colour="grey60", linetype="dashed"))

R语言画图框线条粗细 r语言画条形图_R语言画图框线条粗细_35

# 对分组变量进行展示
# 提取出 name 变量, 根据 lg 和 avg对其排序
nameorder <- tophit$name[order(tophit$lg, tophit$avg)]

# 将 name转化为因子,因子水平与 nameorder 一致
tophit$name <- factor(tophit$name, levels=nameorder)

# geom_segment()函数用"以数据点为端点的线段"代替网格线
ggplot(tophit, aes(x=avg, y=name)) +
    geom_segment(aes(yend=name), xend=0, colour="grey50") +
    geom_point(size=3, aes(colour=lg)) +
    scale_colour_brewer(palette="Set1", limits=c("NL","AL")) +
    theme_bw() +
    theme(panel.grid.major.y = element_blank(), 
          legend.position=c(1, 0.55),  # 将图例放在绘图区域
          legend.justification=c(1, 0.5))

R语言画图框线条粗细 r语言画条形图_ggplot2_36

#分面展示分组数据
ggplot(tophit, aes(x=avg, y=name)) +
    geom_segment(aes(yend=name), xend=0, colour="grey50") +
    geom_point(size=3, aes(colour=lg)) +
    scale_colour_brewer(palette="Set1", limits=c("NL","AL"), guide=FALSE) +
    theme_bw() +
    theme(panel.grid.major.y = element_blank()) +
    facet_grid(lg ~ ., scales="free_y", space="free_y")

R语言画图框线条粗细 r语言画条形图_css_37

## 要修改分面显示的堆叠顺序 只有通过调整 lg 变量的因子水平来实现

参考书籍

  • R Graphics Cookbook, 2nd edition.