**map()**类函数在很大程度上替代了循环的作用,并且计算中利用的多线程并行计算,多使用不但可以提高计算效率还可以让代码更加整洁。
library(purrr)
fun=function(x){x+1}
funxy=function(x,y){x-y}
x=c(1,5,8,9)
X=list(1,2,6,5)
y=c(8,4,6,2)
z=c(1,4,8,6)
x是vector的情况:
res=map(x,fun);res
# [[1]]
# [1] 2
#
# [[2]]
# [1] 6
#
# [[3]]
# [1] 9
#
# [[4]]
# [1] 10
x包含list的情况:
res=map(X,fun);res
# [[1]]
# [1] 2
#
# [[2]]
# [1] 3
#
# [[3]]
# [1] 7
#
# [[4]]
# [1] 6
res=map2(x,y,funxy);res
# [[1]]
# [1] -7
#
# [[2]]
# [1] 1
#
# [[3]]
# [1] 2
#
# [[4]]
# [1] 7
如果被映射变量长度不同就会报错:
y2=c(8,4)
res=map2(x,y2,funxy);res
# 错误: Mapped vectors must have consistent lengths:
# * `.x` has length 4
# * `.y` has length 2
list与vector混合的情况:
res=map2(X,y,funxy);res
# [[1]]
# [1] -7
#
# [[2]]
# [1] -2
#
# [[3]]
# [1] 0
#
# [[4]]
# [1] 3
funxyz=function(x,y,z){x+y+z}
I=list(X,y,z)
res=pmap(I,funxyz);res
# [[1]]
# [1] 10
#
# [[2]]
# [1] 10
#
# [[3]]
# [1] 20
#
# [[4]]
# [1] 13
返回逻辑向量
library(purrr)
fun=function(x){x>1}
funxy=function(x,y){x>y}
funxyz=function(x,y,z){x>y-z}
y=c(8,4,6,2)
x=c(1,5,8,9)
z=c(9,5,4,6)
mydata=data.frame(x,y,z)
res=map_lgl(x,fun);res
#[1] FALSE TRUE TRUE TRUE
res=map2_lgl(x,y,funxy);res
#[1] FALSE TRUE TRUE TRUE
res=pmap_lgl(mydata,funxyz);res
# [1] TRUE TRUE TRUE TRUE
返回字符串向量
library(purrr)
fun=function(x){as.character(x)}
funxy=function(x,y){paste0(as.character(x),as.character(y))}
funxyz=function(x,y,z){paste0(as.character(x),as.character(y),as.character(z))}
y=c(8,4,6,2)
x=c(1,5,8,9)
z=c(9,5,4,6)
mydata=data.frame(x,y,z)
res=map_chr(x,fun);res
# "1" "5" "8" "9"
res=map2_chr(x,y,funxy);res
# "18" "54" "86" "92"
res=pmap_chr(mydata,funxyz);res
# "189" "545" "864" "926"
返回的是一个双精度向量
library(purrr)
fun=function(x){x+0.11}
funxy=function(x,y){x-y+0.69}
funxyz=function(x,y,z){x-y-z-0.554}
y=c(8,4,6,2)
x=c(1,5,8,9)
z=c(9,5,4,6)
mydata=data.frame(x,y,z)
res=map_dbl(x,fun);res
# [1] 1.11 5.11 8.11 9.11
res=map2_dbl(x,y,funxy);res
# [1] -6.31 1.69 2.69 7.69
res=pmap_dbl(mydata,funxyz);res
# [1] -16.554 -4.554 -2.554 0.446
返回一个dataframe, 是由每个f(x)输出rbind后的结果,因此也需要f的返回是一个dataframe。
library(purrr)
fun=function(x){data.frame(var1=x+0.11,var2=x)}
funxy=function(x,y){data.frame(var1=x-y+0.69,var2=x)}
funxyz=function(x,y,z){data.frame(var1=x-y-z-0.554,var2=x+y+z)}
y=c(8,4,6,2)
x=c(1,5,8,9)
z=c(9,5,4,6)
mydata=data.frame(x,y,z)
res=map_dfr(x,fun);res
# var1 var2
# 1 1.11 1
# 2 5.11 5
# 3 8.11 8
# 4 9.11 9
res=map2_dfr(x,y,funxy);res
# var1 var2
# 1 -6.31 1
# 2 1.69 5
# 3 2.69 8
# 4 7.69 9
res=pmap_dfr(mydata,funxyz);res
# var1 var2
# 1 -16.554 18
# 2 -4.554 14
# 3 -2.554 18
# 4 0.446 17
返回一个dataframe, 是由每个f(x)输出cbind后的结果,因此也需要f的返回是一个可以被cbind的输出。
library(purrr)
fun=function(x){data.frame(var1=x+0.11,var2=x)}
funxy=function(x,y){c(x-y+0.69,x)}
funxyz=function(x,y,z){data.frame(var1=x-y-z-0.554,var2=x+y+z)}
y=c(8,4,6,2)
x=c(1,5,8,9)
z=c(9,5,4,6)
mydata=data.frame(x,y,z)
res=map_dfc(x,fun);res
# var1 var2 var11 var21 var12 var22 var13 var23
# 1 1.11 1 5.11 5 8.11 8 9.11 9
res=map2_dfc(x,y,funxy);res
# # A tibble: 2 x 4
# V1 V2 V3 V4
# <dbl> <dbl> <dbl> <dbl>
# 1 -6.31 1.69 2.69 7.69
# 2 1 5 8 9
res=pmap_dfc(mydata,funxyz);res
# var1 var2 var11 var21 var12 var22 var13 var23
# 1 -16.554 18 -4.554 14 -2.554 18 0.446 17
这里.p是判断函数,.f是判断为真的执行语句,.else是判断为假的执行语句,.at可以是字符串或者数值索引,表示对.x中这些对象起作用。注意这里的作用单元是list或vector中的每个原子,而非横向地从每个原子中取元素执行。
library(purrr)
fun=function(x){sum(x)>8}
x=c(1,5,8,9)
y=c(9,8,5,3)
z=c(-1,-2,5,3)
mydata=data.frame(x=x,y=y,z=z)
res=map_if(.x=x,.p=fun,.f=~'right',.else=~'wrong');res
# [[1]]
# [1] "wrong"
#
# [[2]]
# [1] "right"
#
# [[3]]
# [1] "right"
#
# [[4]]
# [1] "right"
res=map_if(.x=mydata,.p=fun,.f=sum,.else=~'wrong');res
# $x
# [1] 23
#
# $y
# [1] 20
#
# $z
# [1] "wrong"
res=map_at(.at=2:3,.x=x,.p=fun,.f=~'right',.else=~'wrong');res
# [[1]]
# [1] 1
#
# [[2]]
# [1] "right"
#
# [[3]]
# [1] "right"
#
# [[4]]
# [1] 9
res=map_at(.at=2:3,.x=mydata,.p=fun,.f=~'right',.else=~'wrong');res
# $x
# [1] 1 5 8 9
#
# $y
# [1] "right"
#
# $z
# [1] "right"
1:10 %>%
map(rnorm, n = 10) %>%
map_dbl(mean)
# Or use an anonymous function
1:10 %>%
map(function(x) rnorm(10, x))
# Or a formula
1:10 %>%
map(~ rnorm(10, .x))
# Using set_names() with character vectors is handy to keep track
# of the original inputs:
set_names(c("foo", "bar")) %>% map_chr(paste0, ":suffix")
# Supply multiple values to index deeply into a list
l2 <- list(
list(num = 1:3, letters[1:3]),
list(num = 101:103, letters[4:6]),
list()
)
l2 %>% map(c(2, 2))
# A more realistic example: split a data frame into pieces, fit a
# model to each piece, summarise and extract R^2
mtcars %>%
split(.$cyl) %>%
map(~ lm(mpg ~ wt, data = .x)) %>%
map(summary) %>%
map_dbl("r.squared")
# If each element of the output is a data frame, use
# map_dfr to row-bind them together:
mtcars %>%
split(.$cyl) %>%
map(~ lm(mpg ~ wt, data = .x)) %>%
map_dfr(~ as.data.frame(t(as.matrix(coef(.)))))
# (if you also want to preserve the variable names see
# the broom package)