x=c("垃圾邮件","正常邮件")
y=c(357127,313150)
d1=data.frame(x,y)
#ggplot()函数生成图层,
#aes()指定x和y轴变量,x一般为名义变量;分组直方图添加fill=类别
#geom_bar生成直方图图层。
#调整颜色fill填充色colour边的颜色,
#直方图宽度调整用width函数,默认值为0.9
#柱子之间距离position=position_dodge(0.6) 值越大越离得远
#geom_text()给直方图中增加图注或数据 geom_text(aes(label=Weight), vjust=-0.2)#显示在上面 geom_text(aes(label=Weight),vjust=1.5,colour="white")#显示在里面
#添加标题ggtitle("对比图")
#添加坐标轴说明 +xlab("邮件类别") + ylab("邮件数量") + labs(fill="类型")
#ggplot2中文乱码 添加+theme(text = element_text(family = 'STXihei')) mac用的是 STXihei字体
ggplot(d1, aes(x=x, y=y,fill=x)) +xlab("邮件类别") + ylab("邮件数量")+ geom_bar(stat="identity", width=0.5) +geom_text(aes(label=y), vjust=-0.2)+ labs(fill="类别")+theme(text = element_text(family = 'STXihei'))
散点图
ggplot(msleep, aes(sleep_rem / sleep_total,awake)) +geom_point()  #等于 qplot(sleep_rem / sleep_total, awake, data =msleep) 

ggplot(msleep, aes(sleep_rem / sleep_total,awake)) +geom_point() +geom_smooth()#等于qplot(sleep_rem/sleep_total,awake,data=msleep,geom=c("point", "smooth")) 

修改图(可换数据集、映射) 

p <- ggplot(mtcars, aes(mpg, wt,colour = cyl)) + geom_point() 

p 

mtcars <- transform(mtcars, mpg =mpg ^ 2) #修改数据集 

p %+% mtcars  #用mtcars代替原来的mtcars 

p <- ggplot(mtcars, aes(mpg, wt)) 

p + geom_point(colour = "darkblue") 

Groups分组 

Oxboys数据集 

多分组单一装饰属性 

P <- ggplot(Oxboys, aes(age, height, group = Subject)) + geom_line() 

P 

p <- ggplot(Oxboys, aes(age, height, group = 1)) + geom_line() 

p 

p + geom_smooth(aes(group = 1), method="lm", size = 2, se = F) 

箱线图  覆盖缺省分组 

boysbox <- ggplot(Oxboys, aes(Occasion, height)) + geom_boxplot() 

boysbox 

boysbox + geom_line(aes(group = Subject), colour = "#3366FF") 

直方图 

ggplot(diamonds, aes(carat)) + geom_histogram(aes(y = ..density..), binwidth = 0.1) 

位置调整 

p <- ggplot(diamonds, aes(clarity,fill=cut)) 

p + geom_bar(position='stack')+scale_y_continuous(breaks=seq(0,12000,2000))  #堆叠stack  1个加1个叠放 

p <- ggplot(diamonds, aes(clarity,fill=factor(cut))) 

p + geom_bar(position='fill') #fill填充  与堆叠类似,但高度标准化为1 

p <- ggplot(diamonds, aes(clarity,fill=factor(cut))) 

p + geom_bar(position='dodge') #dodge并列  柱状图并列 

用标度来修改颜色取值 

p + geom_point(aes(colour=factor(year)))+stat_smooth()+scale_color_manual(values =c('blue','red')) 

饼图 

p <- ggplot(mpg, aes(x = factor(1), fill = factor(class))) +geom_bar(width = 1) 

p + coord_polar(theta = "y") 

观察密集散点的方法 

• 增加扰动 (jitter) 

• 增加透明度 (alpha) 

• 二维直方图 (stat_bin2d) 

• 密度图 (stat_density2d) 

p + stat_density2d(aes(fill = ..level..), geom="polygon") +coord_cartesian(xlim = c(0, 1.5),ylim=c(0,6000))+scale_fill_continuous(high='red2',low='blue4') 

风向风速玫瑰图 

#随机生成100次风向,并汇集到16个区间内 

dir <- cut_interval(runif(100,0,360),n=16) 

#随机生成100次风速,并划分成4种强度 

mag <- cut_interval(rgamma(100,15),4) 

sample <- data.frame(dir=dir,mag=mag) 

#将风向映射到X轴,频数映射到Y轴,风速大小映射到 

填充色,生成条形图后再转为极坐标形式即可 

p <- ggplot(sample,aes(x=dir,y=..count..,fill=mag)) 

p + geom_bar()+ coord_polar() 

组合geoms和stats 

d <- ggplot(diamonds, aes(carat)) + xlim(0, 3) 

d + stat_bin(aes(ymax = ..count..), binwidth = 0.1, geom = "area")  #频数画图 

d + stat_bin( 

aes(size = ..density..), binwidth = 0.1, 

geom = "point", position="identity" 

)  #概率画图 

d + stat_bin( 

aes(y = 1, fill = ..count..), binwidth = 0.1, 

geom = "tile", position="identity" 

)  #按频数的颜色画线条 

d +geom_bar( 

aes(y = 1, fill = ..count..), binwidth = 0.1, 

geom = "tile", position="identity" 

) 



基本作图类型 

df <- data.frame( 

x = c(3, 1, 5), 

y = c(2, 4, 6), 

label = c("a","b","c") 

) 

p <- ggplot(df, aes(x, y, label = label)) +xlab(NULL) + ylab(NULL) 

散点图 

p + geom_point() + ggtitle("geom_point") 

柱形图 

p + geom_bar(stat="identity") +ggtitle( "geom_bar(stat=\"identity\")") 

线图 

p + geom_line() + ggtitle( "geom_line") 

填充图 

p + geom_area() + ggtitle("geom_area") 

路径图 

p + geom_path() + ggtitle("geom_path") 

文字标识 

p + geom_text() + ggtitle("geom_text") 

Tile plot 色深图水平图 

p + geom_tile() + ggtitle("geom_tile") 

Polygon plot 绘制多边形 

p + geom_polygon() + ggtitle("geom_polygon") 

画分布的技巧 

depth_dist <- ggplot(diamonds, aes(depth)) + xlim(58, 68) 

depth_dist +geom_histogram(aes(y = ..density..), binwidth = 0.1) +facet_grid(cut ~ .) 

分布 

depth_dist + geom_histogram(aes(fill = cut), binwidth = 0.1,position = "fill") 

depth_dist + geom_freqpoly(aes(y = ..density.., colour = cut),binwidth = 0.1) 

通过散点形状和大小控制重叠 

df <- data.frame(x = rnorm(2000), y = rnorm(2000)) 

norm <- ggplot(df, aes(x, y)) 

norm + geom_point() 

norm + geom_point(shape = 1) 

norm + geom_point(shape = ".") # 点的大小为像素点 

通过透明度控制 

norm + geom_point(colour = alpha("black", 1/3)) 

norm + geom_point(colour = alpha("black", 1/5)) 

norm + geom_point(colour = alpha("black", 1/10)) 

扰动(jitter)表示法 

td <- ggplot(diamonds, aes(table, depth)) +xlim(50, 70) + ylim(50, 70) 

td + geom_point() 

td + geom_jitter() 

jit <- position_jitter(width = 0.5) 

td + geom_jitter(position = jit) 

td + geom_jitter(position = jit, colour = alpha("black", 1/3)) 

td + geom_jitter(position = jit, colour = alpha("black", 1/5)) 

td + geom_jitter(position = jit, colour = alpha("black", 1/20)) 

在美国地图上画出us.cities数据集中所有城市的位置,用适当的方式表达其人口(例如散点的直径),如果发生遮盖的情况也请适当地处理使展现的信息更全面,更合理,更美观  

library(maps) 

data(us.cities)  

big_cities <- subset(us.cities,long> -130) 

ggplot(big_cities,aes(long,lat))+borders("state",size=0.5,colour="grey70")+geom_point(colour="black",alpha=0.5,aes(size = pop))  

library(ggplot2) 

library(maps) 

data(us.cities) 

p <-ggplot(us.cities,aes(long,lat))+borders("state",colour="grey70") 

p+geom_point(aes(long,lat,size=pop),data=us.cities,colour="black",alpha=0.5) 

Facet grid分组 

mpg2 <- subset(mpg, cyl != 5 & drv %in% c("4", "f")) 

qplot(cty, hwy, data = mpg2) + facet_grid(. ~ cyl) 

qplot(cty, data = mpg2, geom="histogram", binwidth = 2) + facet_grid(cyl ~ .) 

qplot(cty, hwy, data = mpg2) + facet_grid(drv ~ cyl)  #按变量组合分组 

边缘控制 

p <- qplot(displ, hwy, data = mpg2) +geom_smooth(method = "lm", se = F) 

p + facet_grid(cyl ~ drv) 

p + facet_grid(cyl ~ drv, margins = T)  #把超出的放回来 

qplot(displ, hwy, data = mpg2) + geom_smooth(aes(colour = drv), method = "lm",se = F) + facet_grid(cyl ~ drv, margins = T) 

movies$decade <- floor(movies$year/10)*10 

qplot(rating, ..density.., data=subset(movies, decade > 1890),geom="histogram",binwidth = 0.5) +facet_wrap(~ decade, ncol = 6) 

时间的表达 

library(ggplot2) 

library(Rmisc) 

library(scales) 

data(economics) 

q <- ggplot(economics, aes(x=date,y=psavert))+ylab("Personal savings rate")  

q1 <- q+geom_line()+scale_x_date(breaks=seq(as.Date('1967-01-01'),as.Date('2007-01-01'),by="5 years"),labels=date_format("%Y")) #以5年为周期 

q2 <- q+geom_line()+scale_x_date(breaks=seq(as.Date('1967-01-01'),as.Date('2007-01-01'),by="10 years"),labels=date_format("%Y")) #以10年为周期 

q3 <- q+geom_line()+scale_x_date(labels=date_format("%Y-%m-%d"),limits = as.Date(c("2004-01-01", "2005-01-01"))) #只显示这段时间内 

t <- multiplot(q1, q2, q3, cols=3)  #多重图绘制 

last_month <- Sys.Date() - 0:29  #获取时间 当前时间的前29天 

df <- data.frame( 

  date = last_month, 

  price = runif(30) 

) 

base <- ggplot(df, aes(date, price)) +geom_line() 

base + scale_x_date(date_labels = "%b %d") #按天显示 base + scale_x_date(date_labels = "%m %d") 

base + scale_x_date(date_breaks = "1 week", date_labels = "%W")  #按周显示 

base + scale_x_date(date_minor_breaks = "1 day")  #按天显示 

# Set limits 

base + scale_x_date(limits = c(Sys.Date() - 7, NA))#过去7天到现在 

极坐标系, coord_xxx() 

主题变换theme_set()  theme_bw() 黑白风格主题 theme_gray() 灰色背景 

previous_theme <- theme_set(theme_bw()) 

hgramt<-qplot(psavert, data = economics, binwidth = 1) 

hgramt 

hgramt <- hgramt +ggtitle("This is a histogram") 

hgramt + theme(plot.title = element_text(size = 20)) 

hgramt + theme(plot.title = element_text(size = 20,color="red")) 

hgramt + theme(plot.title = element_text(size = 20,hjust=0))  #标题最左 

hgramt + theme(plot.title = element_text(size = 20,face="bold"))  #标题加粗 

hgramt + theme(plot.title = element_text(size = 20,angle=180))    #标题反转 

控制坐标线 

hgramt + theme(panel.grid.major = element_line(colour = "red")) 

hgramt + theme(panel.grid.major = element_line(size = 2)) 

hgramt + theme(panel.grid.major = element_line(linetype = "dotted"))  #有点的线 

hgramt + theme(axis.line = element_line()) 

hgramt + theme(axis.line = element_line(colour = "red")) 

hgramt + theme(axis.line = element_line(size = 0.5,linetype = "dashed")) 

背景控制 

hgramt + theme(plot.background = element_rect(fill = "grey80",colour = NA)) 

hgramt + theme(plot.background = element_rect(size = 2)) 

hgramt + theme(plot.background = element_rect(colour = "red")) 

hgramt + theme(panel.background = element_rect()) 

hgramt + theme(panel.background = element_rect(colour = NA)) 

hgramt + theme(panel.background =element_rect(linetype = "dotted")) 

element_blank()  去掉内容 

last_plot() + theme(panel.grid.minor = element_blank()) 

last_plot() + theme(panel.grid.major = element_blank()) 

last_plot() + theme(panel.background = element_blank()) 

last_plot() + theme(axis.title.x = element_blank(),axis.title.y = element_blank()) 

last_plot() + theme(axis.line = theme_segment()) 

theme_update() 修改主题风格 

old_theme <- theme_update( 

plot.background = element_rect(fill = "#3366FF"), 

panel.background = element_rect(fill = "#003DF5"), 

axis.text.x = element_text(colour = "#CCFF33"), 

axis.text.y = element_text(colour = "#CCFF33", hjust = 1), 

axis.title.x = element_text(colour = "#CCFF33", face = "bold"), 

axis.title.y = element_text(colour = "#CCFF33", face = "bold", 

angle = 90) 

) 

qplot(cut, data = diamonds, geom="bar") 

qplot(cty, hwy, data = mpg) 

theme_set(old_theme) 

#ggthemes包 不同风格的包 

#传统ggplot2风格 

library("ggthemes") 

ggplot(mtcars,aes(x=mpg,y=wt,size=cyl,colour=factor(gear)))+ 

geom_point(alpha=.5)+ #透明度 

scale_size_area()+ #区域和数值成比例 

scale_colour_brewer(palette="Set1")+ 

ggtitle("Motor Trend Car Road Tests") 

#Excel风格 

ggplot(mtcars,aes(x=mpg,y=wt,size=cyl,colour=factor(gear)))+ 

geom_point()+ 

theme_excel() + 

scale_fill_excel()+ 

ggtitle("Motor Trend Car Road Tests") 

#Economist风格 

ggplot(mtcars,aes(x=mpg,y=wt,size=cyl,colour=factor(gear)))+ 

geom_point(alpha=.5)+ 

theme_economist() + 

scale_colour_economist()+ 

ggtitle("Motor Trend Car Road Tests") 

#深色风格 

ggplot(mtcars,aes(x=mpg,y=wt,size=cyl,colour=factor(gear)))+ 

geom_point()+ 

theme_solarized(light=FALSE) + 

scale_colour_solarized("red")+ 

ggtitle("Motor Trend Car Road Tests") 

#输出到文件ggsave() 

ggsave(file = "output.pdf") 

pdf(file = "output.pdf", width = 6, height = 6) 

qplot(mpg, wt, data = mtcars) 

qplot(wt, mpg, data = mtcars) 

dev.off() #关闭图 形 装 置 



在同一页面上画多幅图viewport()创建视图窗口 

先产生三幅图 

(a <- qplot(date, unemploy, data = economics, geom = "line")) 

(b <- qplot(uempmed, unemploy, data = economics) + 

geom_smooth(se = F)) 

(c <- qplot(uempmed, unemploy, data = economics, geom="path")) 

library(grid) 

# A viewport that takes up the entire plot device占全窗口 

vp1 <- viewport(width = 1, height = 1, x = 0.5, y = 0.5)  #单位npc 0-1(0,0)左下角(1,1)右上角(0.5,0.5)窗口中心 

vp1 <- viewport() 

# A viewport that takes up half the width and half the height,占一半窗口 

# located in the middle of the plot.定位在中心 

vp2 <- viewport(width = 0.5, height = 0.5, x = 0.5, y = 0.5) 

vp2 <- viewport(width = 0.5, height = 0.5) 

# A viewport that is 2cm x 3cm located in the center  2cm x 3cm图定位在中心 

vp3 <- viewport(width = unit(2, "cm"), height = unit(3, "cm")) #2厘米 3厘米 

pdf("polishing-subplot-1.pdf", width = 4, height = 4) 

subvp <- viewport(width = 0.4, height = 0.4, x = 0.75, y = 0.35)  #创建新窗口 

b   #在新窗口画b图形 

print(c, vp = subvp)  #在c图上输出b图形   

dev.off()   

#改进  去掉标题缩小边缘 

csmall <- c +theme_gray(9) +labs(x = NULL, y = NULL) +theme(plot.margin = unit(rep(0, 4), "lines")) 

pdf("polishing-subplot-2.pdf", width = 4, height = 4) 

b 

print(csmall, vp = subvp) 

dev.off() 

#指定比例 grid.layout()  矩形网络 

pdf("polishing-layout.pdf", width = 8, height = 6) 

grid.newpage() #创建新页面 

pushViewport(viewport(layout = grid.layout(2, 2)))  #分割成2X2的网格 

vplayout <- function(x, y)viewport(layout.pos.row = x, layout.pos.col = y) 

print(a, vp = vplayout(1, 1:2)) 

print(b, vp = vplayout(2, 1)) 

print(c, vp = vplayout(2, 2)) 

dev.off() 

#画各种函数图像 

p <- ggplot(data.frame(x=c(-3,3)), aes(x=x))  #创建数据框 -3~3 x=-3~3 

p + stat_function(fun = dnorm)  #使用统计变换函数 函数为dnorm() 的返回值是正态分布概率密度函数 

myfun <- function(xvar) { 

1/(1 + exp(-xvar + 10))   #一个指数函数的y值 

} 

ggplot(data.frame(x=c(0, 20)), aes(x=x)) + stat_function(fun=myfun) 

#画微积分学中常见的曲边梯形  画阴影部分 

# Return dnorm(x) for 0 < x < 2, and NA for all other x 

dnorm_limit <- function(x) { 

y <- dnorm(x) 

y[x < 0 | x > 2] <- NA   #不画的区域 

return(y) 

} 

# ggplot() with dummy data 

p <- ggplot(data.frame(x=c(-3, 3)), aes(x=x)) 

p + stat_function(fun=dnorm_limit, geom="area", fill="blue", alpha=0.2)+stat_function(fun=dnorm)  #area区域 



#让图形动起来  动态图 

#并非ggplot2包的功能 

library(rgl) 

plot3d(mtcars$wt, mtcars$disp, mtcars$mpg, type="s", size=0.75, lit=FALSE) 

play3d(spin3d()) 

#其它作图包 

# Lattice 

# Grid 

ggplot(dd, aes(dx, d1)) + geom_point()+ geom_abline(intercept = 2439, slope = -3)  #增加线性拟合 

一图画多个数据图x相同y不同 

> x 

   x1  x2  x3  x4 

1   1  45  46  43 

2   2  26  27  24 

library("reshape2") 

data1 <- melt(x, id.vars="x1") #将x的值x1赋予 

ggplot(data1, aes(x=x1, y=value)) + geom_line(aes(color=variable))+geom_point()  #y=value  color=variable 会读取数据各列作为新的Y值 

ggplot(data1, aes(x=x1, y=value)) + geom_line(aes(color=variable))+geom_point(aes(color=variable,shape = variable, size = variable)) 

ggplot(data1, aes(x=x1, y=value)) + geom_line(aes(color=variable))+geom_point(aes(color=variable,size = 2))  #shape形状为因子类型,size可以为数字,也可以为因子 

ggplot(data1, aes(x=x1, y=value)) + geom_line(aes(color=variable))+geom_point(aes(color=variable,size=2)) +geom_smooth()  #加上拟合 

+ geom_text(hjust=0,vjust=-1,alpha=0.8,label=rownames(data1))#label=colnames(data1)加上显示的数据,加上文字说明 显示行名  hjust=0显示往左的偏移量(-代表往右)vjust=-1 显示往下的偏移量(-代表往上) 

+ geom_text(hjust=0,vjust=-1,alpha=0.8,label=data1$value)  #label=data1$value文字说明显示y的值 

+ scale_y_continuous(limits=c(0, max(data1$value)*1.1)  #设置Y轴刻度范围 

+ scale_x_continuous(limits=c(0, 5))  #设置x轴刻度范围  + scale_x_continuous(limits=c(0, max(data1$x1)*2.1)) 

+ theme(axis.title.y = element_text(size = 12*1.33, angle = 90)) ##   y坐标轴标题大小.方向.颜色  angle是角度 0是水平90是垂直 

+ theme(axis.title.x = element_text(size = 12*1.33, angle = 00)) ## x坐标轴标题大小.方向.颜色 

+ labs(x="时间顺序",y="流量值",title = "访问流量的时间序列图") ## .x/y轴标题 图形标题 

+ theme(axis.text.x =  element_text(angle = 00, hjust = 0.5, size=12*1.33, color="black"))  ## x坐标轴文字大小.方向.颜色 hjust在对应刻度往左的偏移量 

+ theme(axis.text.y =  element_text(angle = 00, hjust = 1, size=12*1.33, color="black"))   ## y坐标轴文字大小.方向.颜色 



colors()打印所有颜色 

orderID  from_ent_level   code_count   

       1       生产出库             325920       

       3              1                  20280          

       4              2                  50760            

       5              3                  7813            

       6              4                 620                

       9       终端购进            13292 

lab_Y<<- paste("出库量(万",vDrugPkgSpec,")\n",sep = "")      ## y轴标题 

p1 <- ggplot(data = df_report_1_toPPT2, aes(x=from_ent_level,y=df_report_1_toPPT2$code_count))     ## 图形数据 

p1 <- p1 + geom_bar( stat="identity" , width = 0.5, fill = "cornflowerblue")   ##  图层,柱状大小.颜色 

p1 <- p1 + geom_text(label=label_Y,colour = "blue", vjust=-1)        ## 柱状上侧数字.颜色 

p1 <- p1 + labs(x="\n生产、各流通级别、终端",y=lab_Y,title = "生产、各流通级别、终端流通量和纯销比\n") ## 图形标题.x/y轴标题 

p1 <- p1 + theme(axis.text.x =  element_text(angle = 00, hjust = 0.5, size=12*1.33, color="black"))  ## x坐标轴文字大小.方向.颜色 hjust在对应刻度往左的偏移量 

p1 <- p1 + theme(axis.text.y =  element_text(angle = 00, hjust = 1, size=12*1.33, color="black"))   ## y坐标轴文字大小.方向.颜色 

p1 <- p1 + theme(axis.title.x = element_text(size = 12*1.33, angle = 00)) ## x坐标轴标题大小.方向.颜色 

p1 <- p1 + theme(axis.title.y = element_text(size = 12*1.33, angle = 90)) ##   y坐标轴标题大小.方向.颜色  angle是角度 0是水平90是垂直 

p1 <- p1 + scale_y_continuous(limits=c(0, max(aex_Y)*1.1),labels = comma) # 调整y坐标显示方式 

p1 <- p1 + theme( plot.title = element_text(size = 16*1.33, face = "bold"))