geom_bar
ggplot(diamonds) +
geom_bar(aes(cut))
##Levels that don't have any values will be omitted.Add `scale_x_discrete(drop = F)` to force it to display
##Exhibit the proportion of every `cut` on the y-axis
ggplot(diamonds) +
geom_bar(aes(cut, y = ..prop.., group = 1))
##Show different layers grouped by `clarity`
ggplot(diamonds) +
geom_bar(aes(cut, fill = clarity))
#Add `position = "fill"` outside `aes()` to fill the whole columns
#Add `position = "dodge"` outside `aes()` to show a bar plot for every `cut`
geom_line
#必须有两个常驻变量
#经典的清洗数据后画图展示
table2 %>%
pivot_wider(names_from = "type",values_from = "count") %>%
ggplot() +
geom_line(aes(year,cases,group=country)) +
geom_point(aes(year,cases,colour = country)) +
scale_x_continuous(breaks = unique(table2$year))
#若不存在第三个变量,则设group=1
ggplot(dest_delay) +
geom_line(aes(dest,avg_dep_delay,group=1))
geom_histogram
ggplot(diamonds) +
geom_histogram(aes(carat), binwidth = 0.2)
##This can be used to find clusters
#同样可以添加 fill = color 之类的分类变量
#aes()中增加y=..density..将y轴表示为概率密度,其值可能大于1
#Filter the data:
smaller <- diamonds %>%
filter(carat < 3)
ggplot(smaller, aes(carat)) +
geom_histogram(binwidth = 0.2)
#Count the number of every group:
diamonds %>%
count(cut_width(carat, 0.2))
geom_freqpoly
#有且仅有一个常驻变量
#展示连续变量分组后各组的数量分布
##use lines:
ggplot(diamonds) +
geom_freqpoly(aes(price,color = cut), binwidth = 500)
##replace count with density in y,as the number may differ so much
ggplot(diamonds) +
geom_freqpoly(aes(price,color = cut,y=..density..), binwidth = 500)
##show the difference of two situations discriminated by whether a variable is NA:
nycflights13::flights %>%
mutate(
cancelled = is.na(dep_time),
sched_hour = sched_dep_time %/% 100, #x %/% y integer division 5%/%2 is 2
sched_min = sched_dep_time %% 100, #x %% y modulus (x mod y) 5%%2 is 1
sched_dep_time = sched_hour + sched_min / 60
) %>%
ggplot() +
geom_freqpoly(aes(sched_dep_time,color = cancelled, y = ..density..), binwidth = 1/4)
#`color = displ > 5` can also categorize a variable
geom_tile
##the number of each combination
diamonds %>%
count(color, cut)
##a very special plot,which expressed the combination in the form of squares and shows the number by colors
diamonds %>%
count(color, cut) %>%
ggplot() +
geom_tile(aes(color,cut,fill = n))
##avoid "facus head"(fill empty tiles)
mpg %>%
count(class, drv) %>%
complete(class, drv, fill = list(n = 0)) %>%
ggplot() +
geom_tile(aes(class, drv,fill = n))
##apply `geom_tile()` to CO
diamonds %>%
mutate(
carat_group = cut_width(carat,1)
) %>%
count(carat_group, cut) %>%
ggplot() +
geom_tile(aes(carat_group,cut,fill=n))
#`cut_width()` should be additionally named after a variable
geom_point and geom_smooth
#geom_smooth() has a parameter `group`;set `group=drv` to draw the lines of specific groups and remain the original color
#draw a plot with specific lines
ggplot(mpg, aes(displ, hwy,color = class)) +
geom_point() +
geom_smooth(data = filter(mpg, class == "subcompact"), se = F)
ggplot(mpg, aes(displ, hwy, color = class)) +
geom_point() +
geom_smooth(data = filter(mpg, class %in% c("compact", "subcompact") ), se = F)
##Add `position = "jitter"` to `geom_point()` in order to shake the points and avoid overlapping
geom_boxplot
##use barplots:
ggplot(smaller) +
geom_boxplot(aes(carat,price,group=cut_width(carat, 0.1)))
#add varwidth = T to show the trend of the number of each group
ggplot(smaller) +
geom_boxplot(aes(carat,price,group=cut_width(carat, 0.1)),varwidth=T)
##use boxplots:
ggplot(mpg) +
geom_boxplot(aes(reorder(class, hwy, FUN = median), hwy))
#reorder the median of "hwy" in each "class" from the smaller at the left side to the bigger at the right side
#use reorder() in this situation
smaller %>%
mutate(temp = cut_width(carat, 0.1)) %>%
ggplot() +
geom_boxplot(aes(reorder(temp,price,FUN=median), price, group = temp)) +
coord_flip() #avoid negative effects of long names
Rename
+ labs(x = "Month", y = "Destination", fill = "Departure Delay",title = "The Platform")
Data Processing
filter(between(y, 3, 20)) #throw the observations out of the range away
mutate(y = ifelse(y < 3 | y > 20, NA, y)) # ifelse(1,2,3) ;if 1 is true,the value is 2,else it is 3
facet_*
ggplot(mpg) +
geom_point(aes(displ, hwy))
#add `facet_wrap(~class, nrow = 2)` to divide the plot into several parts and let the row be 2
#add `facet_grid(drv ~ cyl)` to divide the plot based on the combination of `drv` and `cyl`,and `facet_grid(. ~ cyl)` by column, facet_grid(cyl ~ .) by row
coord_*
coord_flip #exchange the x and y axises
coord_polar #turn the plot into the polar system,usually paired with geom_bar()
coord_cartesian #zoom in,for example `coord_cartesian(ylim = c(0, 5))`
scale_x_continuous
scale_x_continuous(breaks = unique(table2$year)) #exhibit the values on the x-axis neatly
Function:
geom_point() draw points
geom_smooth() draw smooth lines(`se=F`:conceal the confidence interval)
geom_line() draw straight lines(`group`:divide observations in groups and draw a plot for each)
geom_freqpoly() draw polygonal lines to show the number of one CO ( "binwidth" :the length of the group)
geom_bar() draw bars to show the number of one CA (`fill`:color that filled the bar,for example `fill="blue"`)
geom_histogram() draw bars to show the number of one CO ( "binwidth" :the length of the bin)
geom_boxplot() draw boxplots to show how one CO varies with one CA ("varwidth = T" to show the appearance of the number of each group) or two COs with group=cut_width()
geom_tile() draw "phoenix" square plot