基础常见的条形图

今天已进入24节气中的霜降,《二十四节气解》中说:“气肃而霜降,阴始凝也。”可见“霜降”表示天气逐渐变冷,开始降霜。北京的天气也格外应景,从早上就开始淅淅沥沥下雨,呆在屋里,甚至窝在被窝里,都感觉凉凉的。时间过得真快,刚毕业的时候还是三伏天的夏季,转眼又在北京迎来了冬季~

条形图是常见的一种数据可视化方式,常用于表示类变量(x)对应的数值(y)。这里以ggplot2示例展示如何设置绘制简单的bar plot,如何设置其他图形参数,以及进阶的bar plot示例展示。

ggplot2中绘制条形图的函数有geom_col()geom_bar()

条形图的填充色,边框色,背景色和背景框线的设置

1
2
3
4
5
6
7
8
9
10
11
## 简单的条形图 (左图)
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col()

## 条形的填充色,边框,背景色和框线的设置 (右图)
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col(fill="lightblue", colour = "black") + #设置条形的填充颜色fill, 边框颜色colour
theme_bw() + # 去掉背景颜色
theme( panel.grid.major.x = element_blank(), # 去掉背景框线
panel.grid.major.y = element_blank()
)

填充色设置和变量排序

非默认的填充色设置可以借助scale_fill_brewer()scale_fill_manual(), 对变量的排序:reorder()函数,这里是基于Change的变化对Abb排序。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
## 输入数据处理
library(gcookbook) # Load gcookbook for the uspopchange data set
library(dplyr)
upc <- uspopchange %>%
arrange(desc(Change)) %>%
slice(1:10)
## fill()函数以Region 因子变量填充bar的颜色
ggplot(upc, aes(x = Abb, y = Change, fill = Region)) +
geom_col()

## scale_fill_brewer() or scale_fill_manual()选择其他颜色,reorder对Abb
ggplot(upc, aes(x = reorder(Abb, Change), y = Change, fill = Region)) +
geom_col(colour = "black") +
scale_fill_manual(values = c("#669933", "#FFCC66")) +
xlab("State")

不同颜色表示正负数值的变化

1
2
3
ggplot(climate_sub, aes(x = Year, y = Anomaly10y, fill = pos)) +
geom_col(position = "identity", colour = "black", size = 0.25) + # 边框线的颜色colour和宽度size
scale_fill_manual(values = c("#CCEEFF", "#FFDDDD"), guide = FALSE) # scale_fill_manual() 设置颜色, guide = FALSE 去除legend

一对变量,调整bar的宽度和空间

1
2
3
4
5
6
7
8
9
## 一对变量水平同时展示 dodge
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "dodge", colour = "black") + # dodge 映射要填充的变量
scale_fill_brewer(palette = "Pastel1")

## 调整bar width 和 dodge位置,默认是0.9
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(width = 0.5, colour = "black",position = position_dodge(0.7)) +
scale_fill_brewer(palette = "Pastel1")

堆积图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
## position_stack(reverse = FALSE) 堆积图的顺序调整,guide_legend(reverse = TRUE):legend顺序调整
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = position_stack(reverse = FALSE),colour = "black") +
guides(fill = guide_legend(reverse = TRUE))+
scale_fill_brewer(palette = "Pastel1")
## geom_col(position = "fill") 以比例展示每个类型的堆积,堆积图总分值是1
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "fill",colour = "black") +
guides(fill = guide_legend(reverse = TRUE))+
scale_fill_brewer(palette = "Pastel1")

## scale_y_continuous(labels = scales::percent) 以百分制表示比例
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(colour = "black", position = "fill") +
scale_y_continuous(labels = scales::percent) +
scale_fill_brewer(palette = "Pastel1")

## geom_text添加文本
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col() +
geom_text(aes(label = Weight), vjust = 2)

圈圈图 Dot plot

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
## 图1;dot plot: geom_point()
library(gcookbook) # Load gcookbook for the tophitters2001 data set
tophit <- tophitters2001[1:25, ] # Take the top 25 from the tophitters data set

ggplot(tophit, aes(x = avg, y = name)) +
geom_point()

## 图2 排序:reorder
ggplot(tophit, aes(x = avg, y = reorder(name, avg))) +
geom_point(size = 3) + # Use a larger dot
theme_bw() +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour = "grey60", linetype = "dashed")
)

## 图3 置换坐标方向
ggplot(tophit, aes(x = reorder(name, avg), y = avg)) +
geom_point(size = 3) + # Use a larger dot
theme_bw() +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_line(colour = "grey60", linetype = "dashed"),
axis.text.x = element_text(angle = 60, hjust = 1)
)

## 图4 添加分类颜色
# Get the names, sorted first by lg, then by avg
nameorder <- tophit$name[order(tophit$lg, tophit$avg)]

# Turn name into a factor, with levels in the order of nameorder
tophit$name <- factor(tophit$name, levels = nameorder)

ggplot(tophit, aes(x = avg, y = name)) +
geom_segment(aes(yend = name), xend = 0, colour = "grey50") +
geom_point(size = 3, aes(colour = lg)) +
scale_colour_brewer(palette = "Set1", limits = c("NL", "AL")) +
theme_bw() +
theme(
panel.grid.major.y = element_blank(), # No horizontal grid lines
legend.position = c(1, 0.55), # Put legend inside plot area
legend.justification = c(1, 0.5)
)

## 图5 分面 facet_grid
ggplot(tophit, aes(x = avg, y = name)) +
geom_segment(aes(yend = name), xend = 0, colour = "grey50") +
geom_point(size = 3, aes(colour = lg)) +
scale_colour_brewer(palette = "Set1", limits = c("NL", "AL"), guide = FALSE) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(lg ~ ., scales = "free_y", space = "free_y")