当前位置: 首页 > 工具软件 > Purrr > 使用案例 >

R语言中的函数5:purrr:map()

赵永逸
2023-12-01

前言

**map()**类函数在很大程度上替代了循环的作用,并且计算中利用的多线程并行计算,多使用不但可以提高计算效率还可以让代码更加整洁。

map(.x,.f,…),map2(.x,.y,.f,…),pmap(.I,.f,…)

  • x,y这里可以是atomic vector(向量或矩阵)或是list(列表,数据框),长度要相等。
  • map(x, f),map2(x,y,f),pmap(I,f)返回的是一个和x有同样长度的list.
  • map2()中变量长度必须相同,这点没有python灵活。
  • 在pmap()函数中,I是一个list, f是个多元函数每次从list的每个原子中选一个对象进行计算。
  • f 是一个函数,或者是一个公式例如: ~ x + 2
  • …表示.f所需的参数。

数据准备

library(purrr)
fun=function(x){x+1}
funxy=function(x,y){x-y}
x=c(1,5,8,9)
X=list(1,2,6,5)
y=c(8,4,6,2)
z=c(1,4,8,6)

map()测试

x是vector的情况:

res=map(x,fun);res
# [[1]]
# [1] 2
# 
# [[2]]
# [1] 6
# 
# [[3]]
# [1] 9
# 
# [[4]]
# [1] 10

x包含list的情况:

res=map(X,fun);res
# [[1]]
# [1] 2
# 
# [[2]]
# [1] 3
# 
# [[3]]
# [1] 7
# 
# [[4]]
# [1] 6

map2()测试

res=map2(x,y,funxy);res
# [[1]]
# [1] -7
# 
# [[2]]
# [1] 1
# 
# [[3]]
# [1] 2
# 
# [[4]]
# [1] 7

如果被映射变量长度不同就会报错:

y2=c(8,4)
res=map2(x,y2,funxy);res
# 错误: Mapped vectors must have consistent lengths:
#   * `.x` has length 4
#   * `.y` has length 2

list与vector混合的情况:

res=map2(X,y,funxy);res
# [[1]]
# [1] -7
# 
# [[2]]
# [1] -2
# 
# [[3]]
# [1] 0
# 
# [[4]]
# [1] 3

pmap()测试

funxyz=function(x,y,z){x+y+z}
I=list(X,y,z)
res=pmap(I,funxyz);res
# [[1]]
# [1] 10
# 
# [[2]]
# [1] 10
# 
# [[3]]
# [1] 20
# 
# [[4]]
# [1] 13

map_lgl(),map2_lgl(),pma_lgl()

返回逻辑向量

library(purrr)
fun=function(x){x>1}
funxy=function(x,y){x>y}
funxyz=function(x,y,z){x>y-z}
y=c(8,4,6,2)
x=c(1,5,8,9)
z=c(9,5,4,6)
mydata=data.frame(x,y,z)


res=map_lgl(x,fun);res
#[1] FALSE  TRUE  TRUE  TRUE

res=map2_lgl(x,y,funxy);res
#[1] FALSE  TRUE  TRUE  TRUE

res=pmap_lgl(mydata,funxyz);res
# [1] TRUE TRUE TRUE TRUE

map_chr(), map2_chr(), pmap_chr(),

返回字符串向量

library(purrr)
fun=function(x){as.character(x)}
funxy=function(x,y){paste0(as.character(x),as.character(y))}
funxyz=function(x,y,z){paste0(as.character(x),as.character(y),as.character(z))}
y=c(8,4,6,2)
x=c(1,5,8,9)
z=c(9,5,4,6)
mydata=data.frame(x,y,z)


res=map_chr(x,fun);res
# "1" "5" "8" "9"

res=map2_chr(x,y,funxy);res
# "18" "54" "86" "92"

res=pmap_chr(mydata,funxyz);res
# "189" "545" "864" "926"

map_dbl(),map2_dbl(),pmap_dbl()

返回的是一个双精度向量

library(purrr)
fun=function(x){x+0.11}
funxy=function(x,y){x-y+0.69}
funxyz=function(x,y,z){x-y-z-0.554}
y=c(8,4,6,2)
x=c(1,5,8,9)
z=c(9,5,4,6)
mydata=data.frame(x,y,z)


res=map_dbl(x,fun);res
# [1] 1.11 5.11 8.11 9.11

res=map2_dbl(x,y,funxy);res
# [1] -6.31  1.69  2.69  7.69

res=pmap_dbl(mydata,funxyz);res
# [1] -16.554  -4.554  -2.554   0.446

map_dfr(), map2_dfr(), pmap_dfr()

返回一个dataframe, 是由每个f(x)输出rbind后的结果,因此也需要f的返回是一个dataframe。

 library(purrr)
fun=function(x){data.frame(var1=x+0.11,var2=x)}
funxy=function(x,y){data.frame(var1=x-y+0.69,var2=x)}
funxyz=function(x,y,z){data.frame(var1=x-y-z-0.554,var2=x+y+z)}
y=c(8,4,6,2)
x=c(1,5,8,9)
z=c(9,5,4,6)
mydata=data.frame(x,y,z)


res=map_dfr(x,fun);res
# var1 var2
# 1 1.11    1
# 2 5.11    5
# 3 8.11    8
# 4 9.11    9

res=map2_dfr(x,y,funxy);res
# var1 var2
# 1 -6.31    1
# 2  1.69    5
# 3  2.69    8
# 4  7.69    9

res=pmap_dfr(mydata,funxyz);res
# var1 var2
# 1 -16.554   18
# 2  -4.554   14
# 3  -2.554   18
# 4   0.446   17

map_dfc(), map2_dfc(), pmap_dfc()

返回一个dataframe, 是由每个f(x)输出cbind后的结果,因此也需要f的返回是一个可以被cbind的输出。

library(purrr)
fun=function(x){data.frame(var1=x+0.11,var2=x)}
funxy=function(x,y){c(x-y+0.69,x)}
funxyz=function(x,y,z){data.frame(var1=x-y-z-0.554,var2=x+y+z)}
y=c(8,4,6,2)
x=c(1,5,8,9)
z=c(9,5,4,6)
mydata=data.frame(x,y,z)


res=map_dfc(x,fun);res
# var1 var2 var11 var21 var12 var22 var13 var23
# 1 1.11    1  5.11     5  8.11     8  9.11     9

res=map2_dfc(x,y,funxy);res
# # A tibble: 2 x 4
# V1    V2    V3    V4
# <dbl> <dbl> <dbl> <dbl>
#   1 -6.31  1.69  2.69  7.69
#   2  1     5     8     9   

res=pmap_dfc(mydata,funxyz);res
# var1 var2  var11 var21  var12 var22 var13 var23
# 1 -16.554   18 -4.554    14 -2.554    18 0.446    17

map_if(.x,.p,.f,.else),map_at(.at.x,.p,.f,.else)

这里.p是判断函数,.f是判断为真的执行语句,.else是判断为假的执行语句,.at可以是字符串或者数值索引,表示对.x中这些对象起作用。注意这里的作用单元是list或vector中的每个原子,而非横向地从每个原子中取元素执行。

library(purrr)
fun=function(x){sum(x)>8}
x=c(1,5,8,9)
y=c(9,8,5,3)
z=c(-1,-2,5,3)
mydata=data.frame(x=x,y=y,z=z)
res=map_if(.x=x,.p=fun,.f=~'right',.else=~'wrong');res
# [[1]]
# [1] "wrong"
# 
# [[2]]
# [1] "right"
# 
# [[3]]
# [1] "right"
# 
# [[4]]
# [1] "right"

res=map_if(.x=mydata,.p=fun,.f=sum,.else=~'wrong');res
# $x
# [1] 23
# 
# $y
# [1] 20
# 
# $z
# [1] "wrong"



res=map_at(.at=2:3,.x=x,.p=fun,.f=~'right',.else=~'wrong');res
# [[1]]
# [1] 1
# 
# [[2]]
# [1] "right"
# 
# [[3]]
# [1] "right"
# 
# [[4]]
# [1] 9


res=map_at(.at=2:3,.x=mydata,.p=fun,.f=~'right',.else=~'wrong');res
# $x
# [1] 1 5 8 9
# 
# $y
# [1] "right"
# 
# $z
# [1] "right"

说明文档中的一些例子

1:10 %>%
  map(rnorm, n = 10) %>%
  map_dbl(mean)

# Or use an anonymous function
1:10 %>%
  map(function(x) rnorm(10, x))

# Or a formula
1:10 %>%
  map(~ rnorm(10, .x))



# Using set_names() with character vectors is handy to keep track
# of the original inputs:
set_names(c("foo", "bar")) %>% map_chr(paste0, ":suffix")





# Supply multiple values to index deeply into a list
l2 <- list(
  list(num = 1:3,     letters[1:3]),
  list(num = 101:103, letters[4:6]),
  list()
)
l2 %>% map(c(2, 2))






# A more realistic example: split a data frame into pieces, fit a
# model to each piece, summarise and extract R^2
mtcars %>%
  split(.$cyl) %>%
  map(~ lm(mpg ~ wt, data = .x)) %>%
  map(summary) %>%
  map_dbl("r.squared")





# If each element of the output is a data frame, use
# map_dfr to row-bind them together:
mtcars %>%
  split(.$cyl) %>%
  map(~ lm(mpg ~ wt, data = .x)) %>%
  map_dfr(~ as.data.frame(t(as.matrix(coef(.)))))
# (if you also want to preserve the variable names see
# the broom package)
 类似资料: