基础常见的条形图

今天已进入24节气中的霜降，《二十四节气解》中说：“气肃而霜降，阴始凝也。”可见“霜降”表示天气逐渐变冷，开始降霜。北京的天气也格外应景，从早上就开始淅淅沥沥下雨，呆在屋里，甚至窝在被窝里，都感觉凉凉的。时间过得真快，刚毕业的时候还是三伏天的夏季，转眼又在北京迎来了冬季～

条形图是常见的一种数据可视化方式，常用于表示类变量（x)对应的数值（y）。这里以ggplot2示例展示如何设置绘制简单的bar plot，如何设置其他图形参数，以及进阶的bar plot示例展示。

ggplot2中绘制条形图的函数有geom_col()和geom_bar()。

条形图的填充色，边框色，背景色和背景框线的设置

## 简单的条形图 (左图)
ggplot(pg_mean, aes(x = group, y = weight)) +
  geom_col()

## 条形的填充色，边框，背景色和框线的设置 （右图）
ggplot(pg_mean, aes(x = group, y = weight)) +
  geom_col(fill="lightblue", colour = "black") + #设置条形的填充颜色fill, 边框颜色colour
  theme_bw() +  # 去掉背景颜色                          
  theme( panel.grid.major.x = element_blank(),  # 去掉背景框线
         panel.grid.major.y = element_blank()
         )

填充色设置和变量排序

非默认的填充色设置可以借助scale_fill_brewer() 或 scale_fill_manual()，对变量的排序:reorder()函数，这里是基于Change的变化对Abb排序。

## 输入数据处理
library(gcookbook) # Load gcookbook for the uspopchange data set
library(dplyr)
upc <- uspopchange %>%
  arrange(desc(Change)) %>%
  slice(1:10)
## fill()函数以Region 因子变量填充bar的颜色
ggplot(upc, aes(x = Abb, y = Change, fill = Region)) +
  geom_col()

## scale_fill_brewer() or scale_fill_manual()选择其他颜色，reorder对Abb
ggplot(upc, aes(x = reorder(Abb, Change), y = Change, fill = Region)) +
  geom_col(colour = "black") +
  scale_fill_manual(values = c("#669933", "#FFCC66")) +
  xlab("State")

不同颜色表示正负数值的变化

1
2
3

ggplot(climate_sub, aes(x = Year, y = Anomaly10y, fill = pos)) +
  geom_col(position = "identity", colour = "black", size = 0.25) +   # 边框线的颜色colour和宽度size
  scale_fill_manual(values = c("#CCEEFF", "#FFDDDD"), guide = FALSE) # scale_fill_manual() 设置颜色， guide = FALSE 去除legend

一对变量，调整bar的宽度和空间

## 一对变量水平同时展示 dodge
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(position = "dodge", colour = "black") + # dodge 映射要填充的变量
  scale_fill_brewer(palette = "Pastel1")

## 调整bar width 和 dodge位置，默认是0.9
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(width = 0.5, colour = "black",position = position_dodge(0.7)) + 
  scale_fill_brewer(palette = "Pastel1")

堆积图

## position_stack(reverse = FALSE) 堆积图的顺序调整，guide_legend(reverse = TRUE)：legend顺序调整
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(position = position_stack(reverse = FALSE),colour = "black") +
  guides(fill = guide_legend(reverse = TRUE))+
  scale_fill_brewer(palette = "Pastel1")
## geom_col(position = "fill") 以比例展示每个类型的堆积，堆积图总分值是1
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(position = "fill",colour = "black") +
  guides(fill = guide_legend(reverse = TRUE))+
  scale_fill_brewer(palette = "Pastel1")

## scale_y_continuous(labels = scales::percent) 以百分制表示比例
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col(colour = "black", position = "fill") +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_brewer(palette = "Pastel1")

## geom_text添加文本
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
  geom_col() +
  geom_text(aes(label = Weight), vjust = 2)

圈圈图 Dot plot

## 图1；dot plot: geom_point()
library(gcookbook) # Load gcookbook for the tophitters2001 data set
tophit <- tophitters2001[1:25, ] # Take the top 25 from the tophitters data set

ggplot(tophit, aes(x = avg, y = name)) +
  geom_point()

## 图2 排序:reorder
ggplot(tophit, aes(x = avg, y = reorder(name, avg))) +
  geom_point(size = 3) +  # Use a larger dot
  theme_bw() +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(colour = "grey60", linetype = "dashed")
  )

## 图3 置换坐标方向
ggplot(tophit, aes(x = reorder(name, avg), y = avg)) +
  geom_point(size = 3) +  # Use a larger dot
  theme_bw() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.x = element_line(colour = "grey60", linetype = "dashed"),
    axis.text.x = element_text(angle = 60, hjust = 1)
  )

## 图4 添加分类颜色
# Get the names, sorted first by lg, then by avg
nameorder <- tophit$name[order(tophit$lg, tophit$avg)]

# Turn name into a factor, with levels in the order of nameorder
tophit$name <- factor(tophit$name, levels = nameorder)

ggplot(tophit, aes(x = avg, y = name)) +
  geom_segment(aes(yend = name), xend = 0, colour = "grey50") +
  geom_point(size = 3, aes(colour = lg)) +
  scale_colour_brewer(palette = "Set1", limits = c("NL", "AL")) +
  theme_bw() +
  theme(
    panel.grid.major.y = element_blank(),   # No horizontal grid lines
    legend.position = c(1, 0.55),           # Put legend inside plot area
    legend.justification = c(1, 0.5)
  )

## 图5 分面 facet_grid
ggplot(tophit, aes(x = avg, y = name)) +
  geom_segment(aes(yend = name), xend = 0, colour = "grey50") +
  geom_point(size = 3, aes(colour = lg)) +
  scale_colour_brewer(palette = "Set1", limits = c("NL", "AL"), guide = FALSE) +
  theme_bw() +
  theme(panel.grid.major.y = element_blank()) +
  facet_grid(lg ~ ., scales = "free_y", space = "free_y")