R 函数化编程学习笔记
2019-07-19
- 使用 RMarkdown 的
child
参数,进行文档拼接。 - 这样拼接以后的笔记方便复习。
- 相关问题提交到 GitHub
1 函数化编程介绍
参考 Ghosh (2019)
“ -a style of building the structure and elements of computer programs - that treats computation as the evaluation of mathematical functions and avoids changing-state and mutable data. It is a declarative programming paradigm, which means programming is done with expressions or declarations instead of statements.”
因此函数化编程的目的是减少参数状态修改和数据修改,类似于数学表达式一般批量完成,而非 SQL 一个个 statement 书写。
# assorted documentation ommitted
# usethis::use_pipe() allows us to use %>%
#' @export
read_page_views <- function(){
here::here("data", "page_views.csv") %>%
readr::read_csv() %>%
# data comes in as UTC
dplyr::mutate(time = lubridate::with_tz(time,
tz = "America/Chicago"),
# some non-standard characters in page names were
# coming out garbled, and needed to be replaced
# for use in plot labels
page_name = regex_stuff(page_name))
}
->
#' @export
read_page_views <- function(){
read_data_file(location = "web_data",
file_name = "page_views.csv") %>%
# data comes in as UTC
dplyr::mutate(time = lubridate::with_tz(time,
tz = "America/Chicago"),
page_name = regex_stuff(page_name))
}
read_data_file
is an internal function that wraps the first two steps.
如两个函数的对比,read_data_file
的产生就是因为被频繁调用而产生的,因此函数化编程就是让重复代码编写变成函数被重复调用,减少代码量。
2 基础
2.1 list 对象的函数化编程
参考 Fournier (2018) ,
dplyr 对于 data.frame
的处理,purrr 可以对 list 处理。
2.2 mutate
c <-
list(a = c("1","2"),
b = c("3","4"))
library(purrr)
c %>%
map(as.numeric)
## $a
## [1] 1 2
##
## $b
## [1] 3 4
类似于
c %>%
mutate_all(as.numeric)
2.3 select
library(repurrrsive)
sw_films %>%
head(1) %>%
str
## List of 1
## $ :List of 14
## ..$ title : chr "A New Hope"
## ..$ episode_id : int 4
## ..$ opening_crawl: chr "It is a period of civil war.\r\nRebel spaceships, striking\r\nfrom a hidden base, have won\r\ntheir first victo"| __truncated__
## ..$ director : chr "George Lucas"
## ..$ producer : chr "Gary Kurtz, Rick McCallum"
## ..$ release_date : chr "1977-05-25"
## ..$ characters : chr [1:18] "http://swapi.co/api/people/1/" "http://swapi.co/api/people/2/" "http://swapi.co/api/people/3/" "http://swapi.co/api/people/4/" ...
## ..$ planets : chr [1:3] "http://swapi.co/api/planets/2/" "http://swapi.co/api/planets/3/" "http://swapi.co/api/planets/1/"
## ..$ starships : chr [1:8] "http://swapi.co/api/starships/2/" "http://swapi.co/api/starships/3/" "http://swapi.co/api/starships/5/" "http://swapi.co/api/starships/9/" ...
## ..$ vehicles : chr [1:4] "http://swapi.co/api/vehicles/4/" "http://swapi.co/api/vehicles/6/" "http://swapi.co/api/vehicles/7/" "http://swapi.co/api/vehicles/8/"
## ..$ species : chr [1:5] "http://swapi.co/api/species/5/" "http://swapi.co/api/species/3/" "http://swapi.co/api/species/2/" "http://swapi.co/api/species/1/" ...
## ..$ created : chr "2014-12-10T14:23:31.880000Z"
## ..$ edited : chr "2015-04-11T09:46:52.774897Z"
## ..$ url : chr "http://swapi.co/api/films/1/"
{1:length(sw_films)} %>%
map_chr(~sw_films[[.]]$title)
## [1] "A New Hope" "Attack of the Clones"
## [3] "The Phantom Menace" "Revenge of the Sith"
## [5] "Return of the Jedi" "The Empire Strikes Back"
## [7] "The Force Awakens"
sw_films %>%
map_chr("title")
## [1] "A New Hope" "Attack of the Clones"
## [3] "The Phantom Menace" "Revenge of the Sith"
## [5] "Return of the Jedi" "The Empire Strikes Back"
## [7] "The Force Awakens"
类似于
sw_films %>%
select(title)
2.3.1 select + mutate
参考 DataCamp
suppressMessages(library(tidyverse))
library(repurrrsive)
# Load sw_people data
data(sw_people)
sw_people %>% head(2) %>% str
## List of 2
## $ :List of 16
## ..$ name : chr "Luke Skywalker"
## ..$ height : chr "172"
## ..$ mass : chr "77"
## ..$ hair_color: chr "blond"
## ..$ skin_color: chr "fair"
## ..$ eye_color : chr "blue"
## ..$ birth_year: chr "19BBY"
## ..$ gender : chr "male"
## ..$ homeworld : chr "http://swapi.co/api/planets/1/"
## ..$ films : chr [1:5] "http://swapi.co/api/films/6/" "http://swapi.co/api/films/3/" "http://swapi.co/api/films/2/" "http://swapi.co/api/films/1/" ...
## ..$ species : chr "http://swapi.co/api/species/1/"
## ..$ vehicles : chr [1:2] "http://swapi.co/api/vehicles/14/" "http://swapi.co/api/vehicles/30/"
## ..$ starships : chr [1:2] "http://swapi.co/api/starships/12/" "http://swapi.co/api/starships/22/"
## ..$ created : chr "2014-12-09T13:50:51.644000Z"
## ..$ edited : chr "2014-12-20T21:17:56.891000Z"
## ..$ url : chr "http://swapi.co/api/people/1/"
## $ :List of 14
## ..$ name : chr "C-3PO"
## ..$ height : chr "167"
## ..$ mass : chr "75"
## ..$ hair_color: chr "n/a"
## ..$ skin_color: chr "gold"
## ..$ eye_color : chr "yellow"
## ..$ birth_year: chr "112BBY"
## ..$ gender : chr "n/a"
## ..$ homeworld : chr "http://swapi.co/api/planets/1/"
## ..$ films : chr [1:6] "http://swapi.co/api/films/5/" "http://swapi.co/api/films/4/" "http://swapi.co/api/films/6/" "http://swapi.co/api/films/3/" ...
## ..$ species : chr "http://swapi.co/api/species/2/"
## ..$ created : chr "2014-12-10T15:10:51.357000Z"
## ..$ edited : chr "2014-12-20T21:17:50.309000Z"
## ..$ url : chr "http://swapi.co/api/people/2/"
# Map over sw_people and pull out the height element
height_cm <- map(sw_people, 'height') %>%
map(function(x){
ifelse(x == "unknown",NA,
as.numeric(x))
})
2.4 rename
参考 Fournier (2018, chap. 2)
# Set names so each element of the list is named for the film title
sw_films_named <- sw_films %>%
set_names(map_chr(sw_films, "title"))
# Check to see if the names worked/are correct
names(sw_films_named)
## [1] "A New Hope" "Attack of the Clones"
## [3] "The Phantom Menace" "Revenge of the Sith"
## [5] "Return of the Jedi" "The Empire Strikes Back"
## [7] "The Force Awakens"
2.5 arrange
参考 DataCamp
suppressMessages(library(tidyverse))
library(repurrrsive)
data(gh_users)
gh_users[1:2] %>%
str
## List of 2
## $ :List of 30
## ..$ login : chr "gaborcsardi"
## ..$ id : int 660288
## ..$ avatar_url : chr "https://avatars.githubusercontent.com/u/660288?v=3"
## ..$ gravatar_id : chr ""
## ..$ url : chr "https://api.github.com/users/gaborcsardi"
## ..$ html_url : chr "https://github.com/gaborcsardi"
## ..$ followers_url : chr "https://api.github.com/users/gaborcsardi/followers"
## ..$ following_url : chr "https://api.github.com/users/gaborcsardi/following{/other_user}"
## ..$ gists_url : chr "https://api.github.com/users/gaborcsardi/gists{/gist_id}"
## ..$ starred_url : chr "https://api.github.com/users/gaborcsardi/starred{/owner}{/repo}"
## ..$ subscriptions_url : chr "https://api.github.com/users/gaborcsardi/subscriptions"
## ..$ organizations_url : chr "https://api.github.com/users/gaborcsardi/orgs"
## ..$ repos_url : chr "https://api.github.com/users/gaborcsardi/repos"
## ..$ events_url : chr "https://api.github.com/users/gaborcsardi/events{/privacy}"
## ..$ received_events_url: chr "https://api.github.com/users/gaborcsardi/received_events"
## ..$ type : chr "User"
## ..$ site_admin : logi FALSE
## ..$ name : chr "Gábor Csárdi"
## ..$ company : chr "Mango Solutions, @MangoTheCat "
## ..$ blog : chr "http://gaborcsardi.org"
## ..$ location : chr "Chippenham, UK"
## ..$ email : chr "csardi.gabor@gmail.com"
## ..$ hireable : NULL
## ..$ bio : NULL
## ..$ public_repos : int 52
## ..$ public_gists : int 6
## ..$ followers : int 303
## ..$ following : int 22
## ..$ created_at : chr "2011-03-09T17:29:25Z"
## ..$ updated_at : chr "2016-10-11T11:05:06Z"
## $ :List of 30
## ..$ login : chr "jennybc"
## ..$ id : int 599454
## ..$ avatar_url : chr "https://avatars.githubusercontent.com/u/599454?v=3"
## ..$ gravatar_id : chr ""
## ..$ url : chr "https://api.github.com/users/jennybc"
## ..$ html_url : chr "https://github.com/jennybc"
## ..$ followers_url : chr "https://api.github.com/users/jennybc/followers"
## ..$ following_url : chr "https://api.github.com/users/jennybc/following{/other_user}"
## ..$ gists_url : chr "https://api.github.com/users/jennybc/gists{/gist_id}"
## ..$ starred_url : chr "https://api.github.com/users/jennybc/starred{/owner}{/repo}"
## ..$ subscriptions_url : chr "https://api.github.com/users/jennybc/subscriptions"
## ..$ organizations_url : chr "https://api.github.com/users/jennybc/orgs"
## ..$ repos_url : chr "https://api.github.com/users/jennybc/repos"
## ..$ events_url : chr "https://api.github.com/users/jennybc/events{/privacy}"
## ..$ received_events_url: chr "https://api.github.com/users/jennybc/received_events"
## ..$ type : chr "User"
## ..$ site_admin : logi FALSE
## ..$ name : chr "Jennifer (Jenny) Bryan"
## ..$ company : chr "University of British Columbia"
## ..$ blog : chr "https://twitter.com/JennyBryan"
## ..$ location : chr "Vancouver, BC, Canada"
## ..$ email : NULL
## ..$ hireable : NULL
## ..$ bio : chr "prof at UBC, humane #rstats, statistics, teach @STAT545-UBC, leadership of @rOpenSci, @rsheets, academic director @ubc-mds"
## ..$ public_repos : int 168
## ..$ public_gists : int 54
## ..$ followers : int 780
## ..$ following : int 34
## ..$ created_at : chr "2011-02-03T22:37:41Z"
## ..$ updated_at : chr "2016-10-24T07:20:26Z"
# Determine who has the most public repositories
map_int(gh_users, 'public_repos') %>%
set_names(map_chr(gh_users, 'name')) %>%
sort()
## Julia Silge Ma<U+00EB>lle Salmon Gábor Csárdi
## 26 31 52
## Jeff L. Thomas J. Leeper Jennifer (Jenny) Bryan
## 67 99 168
2.6 list 对象的结构
参考 Fournier (2018)
library(repurrrsive)
suppressMessages(library(tidyverse))
sw_films %>%
map(names)
## [[1]]
## [1] "title" "episode_id" "opening_crawl" "director"
## [5] "producer" "release_date" "characters" "planets"
## [9] "starships" "vehicles" "species" "created"
## [13] "edited" "url"
##
## [[2]]
## [1] "title" "episode_id" "opening_crawl" "director"
## [5] "producer" "release_date" "characters" "planets"
## [9] "starships" "vehicles" "species" "created"
## [13] "edited" "url"
##
## [[3]]
## [1] "title" "episode_id" "opening_crawl" "director"
## [5] "producer" "release_date" "characters" "planets"
## [9] "starships" "vehicles" "species" "created"
## [13] "edited" "url"
##
## [[4]]
## [1] "title" "episode_id" "opening_crawl" "director"
## [5] "producer" "release_date" "characters" "planets"
## [9] "starships" "vehicles" "species" "created"
## [13] "edited" "url"
##
## [[5]]
## [1] "title" "episode_id" "opening_crawl" "director"
## [5] "producer" "release_date" "characters" "planets"
## [9] "starships" "vehicles" "species" "created"
## [13] "edited" "url"
##
## [[6]]
## [1] "title" "episode_id" "opening_crawl" "director"
## [5] "producer" "release_date" "characters" "planets"
## [9] "starships" "vehicles" "species" "created"
## [13] "edited" "url"
##
## [[7]]
## [1] "title" "episode_id" "opening_crawl" "director"
## [5] "producer" "release_date" "characters" "planets"
## [9] "starships" "species" "created" "edited"
## [13] "url"
- 方便
select
- 比 str 展示的信息少,且更有用
2.7 walk 函数批量作图
参考 Fournier (2018)
Printing out lists with map() shows a lot of bracketed text in the console, which can be useful for understanding their structure, but this information is usually not important for communicating with your end users. If you need to print, using walk() prints out lists in a more compact and human-readable way, without all those brackets. walk() is also great for printing out plots without printing anything to the console.
“a lot of bracketed text” 的显示,才是 walk 产生的意义。
suppressMessages(library(tidyverse))
library(repurrrsive)
# Load the gap_split data
data(gap_split)
# Map over the first 10 elements of gap_split
plots <- map2(gap_split[1:3],
names(gap_split[1:3]),
~ ggplot(.x, aes(year, lifeExp)) +
geom_line() +
labs(title = .y))
# Object name, then function name
walk(plots, print)
2.8 建立 list of dfs 对象
参考 Fournier (2018)
suppressMessages(library(tidyverse))
library(repurrrsive)
data(gh_users)
# Create a dataframe with four columns
map_df(gh_users, `[`,
c('login','name','followers','public_repos')) %>%
# Plot followers by public_repos
ggplot(.,
aes(x = followers, y = public_repos)) +
# Create scatter plots
geom_point()
`[`, c('login','name','followers','public_repos')
是构建 data.frame
的 list
的普遍方法。
2.9 处理 list of lists 对象
参考 Fournier (2018)
suppressMessages(library(tidyverse))
library(repurrrsive)
# Map over gh_repos to generate numeric output
map(gh_repos,
~map_dbl(.x,
~.x$size)) %>%
# Grab the largest element
map(~max(.x)) %>%
map_dbl(~.[1])
## [1] 39461 96325 374812 24070 558176 76455
gh_repos[[1]][[1]] %>% str
## List of 68
## $ id : int 61160198
## $ name : chr "after"
## $ full_name : chr "gaborcsardi/after"
## $ owner :List of 17
## ..$ login : chr "gaborcsardi"
## ..$ id : int 660288
## ..$ avatar_url : chr "https://avatars.githubusercontent.com/u/660288?v=3"
## ..$ gravatar_id : chr ""
## ..$ url : chr "https://api.github.com/users/gaborcsardi"
## ..$ html_url : chr "https://github.com/gaborcsardi"
## ..$ followers_url : chr "https://api.github.com/users/gaborcsardi/followers"
## ..$ following_url : chr "https://api.github.com/users/gaborcsardi/following{/other_user}"
## ..$ gists_url : chr "https://api.github.com/users/gaborcsardi/gists{/gist_id}"
## ..$ starred_url : chr "https://api.github.com/users/gaborcsardi/starred{/owner}{/repo}"
## ..$ subscriptions_url : chr "https://api.github.com/users/gaborcsardi/subscriptions"
## ..$ organizations_url : chr "https://api.github.com/users/gaborcsardi/orgs"
## ..$ repos_url : chr "https://api.github.com/users/gaborcsardi/repos"
## ..$ events_url : chr "https://api.github.com/users/gaborcsardi/events{/privacy}"
## ..$ received_events_url: chr "https://api.github.com/users/gaborcsardi/received_events"
## ..$ type : chr "User"
## ..$ site_admin : logi FALSE
## $ private : logi FALSE
## $ html_url : chr "https://github.com/gaborcsardi/after"
## $ description : chr "Run Code in the Background"
## $ fork : logi FALSE
## $ url : chr "https://api.github.com/repos/gaborcsardi/after"
## $ forks_url : chr "https://api.github.com/repos/gaborcsardi/after/forks"
## $ keys_url : chr "https://api.github.com/repos/gaborcsardi/after/keys{/key_id}"
## $ collaborators_url: chr "https://api.github.com/repos/gaborcsardi/after/collaborators{/collaborator}"
## $ teams_url : chr "https://api.github.com/repos/gaborcsardi/after/teams"
## $ hooks_url : chr "https://api.github.com/repos/gaborcsardi/after/hooks"
## $ issue_events_url : chr "https://api.github.com/repos/gaborcsardi/after/issues/events{/number}"
## $ events_url : chr "https://api.github.com/repos/gaborcsardi/after/events"
## $ assignees_url : chr "https://api.github.com/repos/gaborcsardi/after/assignees{/user}"
## $ branches_url : chr "https://api.github.com/repos/gaborcsardi/after/branches{/branch}"
## $ tags_url : chr "https://api.github.com/repos/gaborcsardi/after/tags"
## $ blobs_url : chr "https://api.github.com/repos/gaborcsardi/after/git/blobs{/sha}"
## $ git_tags_url : chr "https://api.github.com/repos/gaborcsardi/after/git/tags{/sha}"
## $ git_refs_url : chr "https://api.github.com/repos/gaborcsardi/after/git/refs{/sha}"
## $ trees_url : chr "https://api.github.com/repos/gaborcsardi/after/git/trees{/sha}"
## $ statuses_url : chr "https://api.github.com/repos/gaborcsardi/after/statuses/{sha}"
## $ languages_url : chr "https://api.github.com/repos/gaborcsardi/after/languages"
## $ stargazers_url : chr "https://api.github.com/repos/gaborcsardi/after/stargazers"
## $ contributors_url : chr "https://api.github.com/repos/gaborcsardi/after/contributors"
## $ subscribers_url : chr "https://api.github.com/repos/gaborcsardi/after/subscribers"
## $ subscription_url : chr "https://api.github.com/repos/gaborcsardi/after/subscription"
## $ commits_url : chr "https://api.github.com/repos/gaborcsardi/after/commits{/sha}"
## $ git_commits_url : chr "https://api.github.com/repos/gaborcsardi/after/git/commits{/sha}"
## $ comments_url : chr "https://api.github.com/repos/gaborcsardi/after/comments{/number}"
## $ issue_comment_url: chr "https://api.github.com/repos/gaborcsardi/after/issues/comments{/number}"
## $ contents_url : chr "https://api.github.com/repos/gaborcsardi/after/contents/{+path}"
## $ compare_url : chr "https://api.github.com/repos/gaborcsardi/after/compare/{base}...{head}"
## $ merges_url : chr "https://api.github.com/repos/gaborcsardi/after/merges"
## $ archive_url : chr "https://api.github.com/repos/gaborcsardi/after/{archive_format}{/ref}"
## $ downloads_url : chr "https://api.github.com/repos/gaborcsardi/after/downloads"
## $ issues_url : chr "https://api.github.com/repos/gaborcsardi/after/issues{/number}"
## $ pulls_url : chr "https://api.github.com/repos/gaborcsardi/after/pulls{/number}"
## $ milestones_url : chr "https://api.github.com/repos/gaborcsardi/after/milestones{/number}"
## $ notifications_url: chr "https://api.github.com/repos/gaborcsardi/after/notifications{?since,all,participating}"
## $ labels_url : chr "https://api.github.com/repos/gaborcsardi/after/labels{/name}"
## $ releases_url : chr "https://api.github.com/repos/gaborcsardi/after/releases{/id}"
## $ deployments_url : chr "https://api.github.com/repos/gaborcsardi/after/deployments"
## $ created_at : chr "2016-06-14T22:33:49Z"
## $ updated_at : chr "2016-07-21T17:42:35Z"
## $ pushed_at : chr "2016-07-09T16:13:42Z"
## $ git_url : chr "git://github.com/gaborcsardi/after.git"
## $ ssh_url : chr "git@github.com:gaborcsardi/after.git"
## $ clone_url : chr "https://github.com/gaborcsardi/after.git"
## $ svn_url : chr "https://github.com/gaborcsardi/after"
## $ homepage : NULL
## $ size : int 15
## $ stargazers_count : int 5
## $ watchers_count : int 5
## $ language : chr "R"
## $ has_issues : logi TRUE
## $ has_downloads : logi TRUE
## $ has_wiki : logi TRUE
## $ has_pages : logi FALSE
## $ forks_count : int 0
## $ mirror_url : NULL
## $ open_issues_count: int 0
## $ forks : int 0
## $ open_issues : int 0
## $ watchers : int 5
## $ default_branch : chr "master"
gh_repos %>%
map_chr(~.x[[1]][['owner']][['login']])
## [1] "gaborcsardi" "jennybc" "jtleek" "juliasilge" "leeper"
## [6] "masalmon"
(size_tmp <-
gh_repos %>%
map(
~map_dbl(
.x,~.x$size
)
))
## [[1]]
## [1] 15 115 199 112 2 180 6244 3704 24 752 998
## [12] 1428 216 13 463 30 8 58 26 39461 34 407
## [23] 120 140 26328 734 58 312 35 4584
##
## [[2]]
## [1] 8701 16662 33968 9470 3979 3163 16793 13661 0 121 144
## [12] 7584 377 456 14207 1039 540 204 204 0 9880 96325
## [23] 3529 4617 12282 9286 3615 90 10606 31
##
## [[3]]
## [1] 200992 188 1789 4682 10421 374812 128 156389 98 547
## [11] 112 388 367 50000 91008 45 147 197 15870 41702
## [21] 116 1199 1067 447 6248 27878 185443 381 23206 6045
##
## [[4]]
## [1] 1127 8254 972 4 672 332 24070 6878 156 3485 19953
## [12] 11337 152 136 11066 2072 2 20 7 103 260 962
## [23] 5 12355 12953 508
##
## [[5]]
## [1] 8052 108 132 32 70 77 48 7 66 0
## [11] 558176 124 15 14 152 24898 95283 3321 261 12
## [21] 456 402 1 2139 0 124 22 1490 17 500
##
## [[6]]
## [1] 1993 4435 719 379 18 631 8436 4884 76455 7952 89
## [12] 689 6426 146 140 8088 45 4757 68 1948 305 19430
## [23] 9803 6599 2429 8265 4351 1784 343 75
size_tmp %>%
map_dbl(max)
## [1] 39461 96325 374812 24070 558176 76455
这个例子比较难。
需要知道变量size
是在 list of list 中的,因此需要嵌套。
在这种情况下,已经感觉 purrr 函数已经不够了。
3 中级
3.1 map + 匿名函数
.x, .y, .z
只适用于参数少的情况下,更多的情况下,使用
..1, ..2, ..3, ..4
suppressMessages(library(tidyverse))
data_frame(
a = 1:10
,b = 1
,c = 1
,d = 1
) %>%
mutate(
sum = pmap_dbl(list(a,b,c,d),~..1+..2+..3+..4)
)
3.2 wrap 函数
参考 FAY (2019)
round_mean <- as_mapper(~ round(mean(.x)))
round_mean
## <lambda>
## function (..., .x = ..1, .y = ..2, . = ..1)
## round(mean(.x))
## attr(,"class")
## [1] "rlang_lambda_function" "function"
round_mean(1:10)
## [1] 6
3.3 every 和 some 条件
参考 FAY (2019)
every(): does every element satisfy a condition?
some(): do some elements satisfy a condition?
mtcars %>% head
mtcars %>% every(~ is.double(.))
## [1] TRUE
mtcars %>% some(~ !is.double(.))
## [1] FALSE
args(every)
## function (.x, .p, ...)
## NULL
args(some)
## function (.x, .p, ...)
## NULL
其中第二个参数叫做 predicates,一般反馈 T 和 F 的,是有特殊定义的。 参考 DataCamp
3.4 keep 和 discard
参考 FAY (2019)
keep()
: extract elements that satisfy a condition
discard()
: remove elements that satisfy a condition
类似于 dplyr 的 select 函数,但是这里主要是针对 list 函数进行的。
suppressMessages(library(tidyverse))
mtcars %>%
select_if(~sd(.) > 5)
mtcars %>%
keep(~sd(.) > 5)
mtcars %>%
discard(~sd(.) <= 5)
# Which is the first element
mtcars %>%
detect_index(~sd(.) > 5)
## [1] 1
# Which is the last element
mtcars %>%
detect_index(~sd(.) > 5, .right = TRUE)
## [1] 4
3.5 横向计算
suppressMessages(library(tidyverse))
data_frame(
a = 1:10
,b = 1:10
) %>%
mutate(
sum = map2_int(a,b,sum)
)
rowrise 相关更多参考 github
3.6 主要管道的类型
参考 FAY (2019)
To understand computations in R, two slogans are helpful:
- Everything that exists is an object.
- Everything that happens is a function call.
-— John Chambers
library(magrittr)
class(`+`)
## [1] "function"
class(`<-`)
## [1] "function"
class(`%>%`)
## [1] "function"
class(`$`)
## [1] "function"
3.7 compact 函数
参考 FAY (2019)
compact()
removes the NULL
:
无论是输入和输出,NULL
都比较难处理。
suppressMessages(library(tidyverse))
list(1, NULL, 3, 4, NULL)
## [[1]]
## [1] 1
##
## [[2]]
## NULL
##
## [[3]]
## [1] 3
##
## [[4]]
## [1] 4
##
## [[5]]
## NULL
list(1, NULL, 3, 4, NULL) %>% compact()
## [[1]]
## [1] 1
##
## [[2]]
## [1] 3
##
## [[3]]
## [1] 4
3.8 对函数进行编程
参考 FAY (2019)
nop_na <- function(fun){
function(...){
fun(..., na.rm = TRUE)
}
}
sd_no_na <- nop_na(sd)
sd_no_na( c(NA, 1, 2, NA) )
## [1] 0.7071068
3.9 safely 处理有效网址
参考 FAY (2019)
safely 函数的一个实际需求就是探查一组网址是否有效。 因为这项工作一个 for 循环中,报错会很多,因此 safely 函数非常适合。
suppressMessages(library(tidyverse))
urls <- list("https://thinkr.fr","https://colinfay.me","http://not_working.org","https://datacamp.com","http://cran.r-project.org/","https://not_working_either.org")
res <- map(urls, safely(read_lines))
named_res <- set_names(res, urls)
map(named_res, 'error')
## $`https://thinkr.fr`
## NULL
##
## $`https://colinfay.me`
## NULL
##
## $`http://not_working.org`
## <simpleError in open.connection(con, "rb"): Could not resolve host: not_working.org>
##
## $`https://datacamp.com`
## NULL
##
## $`http://cran.r-project.org/`
## NULL
##
## $`https://not_working_either.org`
## <simpleError in open.connection(con, "rb"): Timeout was reached: Connection timed out after 10015 milliseconds>
其中 keep 和 discard 函数可以嵌套。
safe_read_discard <- function(url){
safe_read <- safely(read_lines)
safe_read(url) %>%
discard(~!is.null(.))
}
urls %>%
# Map a version of read_lines() that otherwise returns 404
map( possibly(read_lines, otherwise = '404') ) %>%
# Set the names of the result
set_names( urls ) %>%
# paste() and collapse each element
map(paste, collapse =" ") %>%
# Remove the 404
discard(~.x == '404') %>%
names()
## [1] "https://thinkr.fr" "https://colinfay.me"
## [3] "https://datacamp.com" "http://cran.r-project.org/"
urls %>%
map( safely(read_lines) ) %>%
# Remove the 404
map('error') %>%
set_names( urls ) %>%
keep(is.null) %>%
names()
## [1] "https://thinkr.fr" "https://colinfay.me"
## [3] "https://datacamp.com" "http://cran.r-project.org/"
url_tester <- function(url_list, type = c("result", "error")){
res <- url_list %>%
# Create a safely() version of read_lines()
map( safely(read_lines) ) %>%
set_names( url_list ) %>%
# Transpose into a list of $result and $error
transpose()
# Complete this if statement
if (type == 'result') return( res$result )
if (type == 'error') return( res$error )
}
# Try this function on the urls object
url_tester(urls, type = "error")
## $`https://thinkr.fr`
## NULL
##
## $`https://colinfay.me`
## NULL
##
## $`http://not_working.org`
## <simpleError in open.connection(con, "rb"): Could not resolve host: not_working.org>
##
## $`https://datacamp.com`
## NULL
##
## $`http://cran.r-project.org/`
## NULL
##
## $`https://not_working_either.org`
## <simpleError in open.connection(con, "rb"): Timeout was reached: Connection timed out after 10000 milliseconds>
终于理解为什么 set_names
需要是 purrr 重新设计了,因为需要给 list 命名。
DataCamp
library(httr)
url_tester <- function(url_list){
url_list %>%
# Map a version of GET() that would otherwise return NULL
map( possibly(GET, NULL) ) %>%
# Set the names of the result
set_names( urls ) %>%
# Remove the NULL
discard (is.null) %>%
# Extract all the "status_code" elements
map('status_code')
}
# Try this function on the urls object
url_tester(urls)
## $`https://thinkr.fr`
## [1] 200
##
## $`https://colinfay.me`
## [1] 200
##
## $`https://datacamp.com`
## [1] 200
##
## $`http://cran.r-project.org/`
## [1] 200
这样就可以探查那些网站是 200 可读取的。
正常网页反馈Status: 200
,但是不正常的最后是 timeout。
safely(GET)("http://not_working.org")
## $result
## NULL
##
## $error
## <simpleError in curl::curl_fetch_memory(url, handle = handle): Could not resolve host: not_working.org>
3.10 side effect
参考 FAY (2019)
Sys.time() is an extremely impure function, as it will return a different output depending on when you are running it, so is ls(), which depends on what is in your environment. nrow() is pure, as the output only depends on the object you’re using as an input, and it has no side effect. Other examples include read.csv(), which depends on an external source, and if the file changes, the output will change, or plot(), which is by definiton called for its side-effects.
side-effects
指的是当 input 的类型改变,会输出不一样的结果,这种算 side-effects。
当然 plot 是一个 class 函数,这也正常。
同时,pure function 支输出一种类型的结果。
Expect nothing in return, You actually get
.x
invisibly back, good for piping Happy R Users Purrr Charlotte WickhamFor functions called for their side effects:
- printing to screen
- plotting to graphics device
- file manipulation (saving, writing, moving etc.)
- system calls
我认为这个解释最好、举例最清楚。
数据目前下载不了,已提交Github Issue 8
plots <- map(per_day, ~ ggplot(.x, aes(trmt_date, count)) + geom_line())
plots[[1]] # try: walk(plots, print)
这是批量画图的一种思考。
3.11 管道函数
参考 FAY (2019, chap. 3)
Clean code is:
- Light
- Readable
- Interpretable
- Maintainable
轻量的、可读的、可解释的、可维护的。
compose
可以组合函数,不需要使用 %>%
,写成一行,这样更加 readable。
When you use
compose()
, the functions are passed from right to left DataCamp
library(broom)
library(purrr)
clean_aov <- compose(tidy, anova, lm)
result1 <- clean_aov(Sepal.Length ~ Sepal.Width, data = iris)
result2 <- lm(Sepal.Length ~ Sepal.Width, data = iris) %>%
anova %>%
tidy
identical(result1,result2)
## [1] TRUE
result1
处理之前,遗留的 status code 问题,见 ch2-safely-urls
# Launch purrr and httr
library(purrr)
library(httr)
# Compose a status extractor
status_extract <- compose(status_code, GET)
# Try with "https://thinkr.fr" & "http://datacamp.com"
status_extract("https://thinkr.fr")
## [1] 200
status_extract("http://datacamp.com")
## [1] 200
3.12 反逻辑函数
参考 FAY (2019)
Flip the logical Flip 这个词用得好
在学习 pipeline 时,学习到过 jiaxiangbu
is_not_na <- negate(is.na)
x <- c(1,2,3,4, NA)
is.na(x)
## [1] FALSE FALSE FALSE FALSE TRUE
is_not_na(x)
## [1] TRUE TRUE TRUE TRUE FALSE
3.13 嵌套表格
参考 FAY (2019)
library(tidyverse)
df <- tibble(
classic = c("a", "b","c"),
list = list(
c("a", "b","c"),
c("a", "b","c", "d"),
c("a", "b","c", "d", "e")
)
)
df
3.14 rstudioconf 数据 EDA
参考 FAY (2019)
目的是了解这个数据集,学习这种数据集的分析。
suppressMessages(library(tidyverse))
rstudioconf <- read_rds("datasets/rstudioconf.rds")
class(rstudioconf)
## [1] "tbl_df" "tbl" "data.frame"
rt <-
rstudioconf %>%
filter(is_retweet) %>%
distinct(user_id) %>%
mutate(user_id = as.integer(user_id)) %>%
.$user_id
non_rt <-
rstudioconf %>%
filter(!is_retweet) %>%
mutate(user_id = as.integer(user_id)) %>%
distinct(user_id) %>%
.$user_id
(ratio <- setdiff(rt,non_rt) %>% length / union(rt,non_rt) %>% length)
## [1] 0.7480315
library(scales)
转发的人占总人数的74.8%,因此满足 Pareto’s law,参考 DataCamp。
数据集合不对,提交了 Github Issue 1。 我发现这个数据集合是已经清晰好了。
# Create six_most, with tail(), sort(), and table()
six_most <- compose(tail, sort, table)
# Run extractor() on rstudioconf
rstudioconf$mentions_screen_name %>%
as_vector() %>%
na.omit() %>%
six_most()
## .
## JennyBryan hadleywickham AmeliaMN juliasilge drob
## 278 308 362 376 418
## rstudio
## 648
可以查看数据集中最容易被提到的用户。
# Create mean_above, a mapper that tests if .x is over 3.3
mean_above <- as_mapper(~ .x > 3.3)
# Prefil map_at() with "retweet_count", mean_above for above,
# and mean_above negation for below
above <- partial(map_at, .at = "retweet_count", .f = mean_above )
below <- partial(map_at, .at = "retweet_count", .f = negate(mean_above) )
# Map above() and below() on non_rt, keep the "retweet_count"
ab <- map(non_rt, above) %>% keep("retweet_count")
bl <- map(non_rt, below) %>% keep("retweet_count")
# Compare the size of both elements
length(ab)
length(bl)
学习 partial
和 map_at
这种写法,数据集合不支持,因此没有办法练习。
3.15 flatten 函数
参考 FAY (2019)
数据参考 github
suppressMessages(library(tidyverse))
my_list <- list(
list(a = 1),
list(b = 2)
)
flatten(my_list)
## $a
## [1] 1
##
## $b
## [1] 2
my_list
## [[1]]
## [[1]]$a
## [1] 1
##
##
## [[2]]
## [[2]]$b
## [1] 2
3.16 map_at 函数
参考 FAY (2019)
suppressMessages(library(tidyverse))
my_list <- list(
a = 1:10,
b = 1:100,
c = 12
)
map_at(.x = my_list, .at = "b", .f = sum)
## $a
## [1] 1 2 3 4 5 6 7 8 9 10
##
## $b
## [1] 5050
##
## $c
## [1] 12
4 书写函数
4.1 函数解剖
参考 Wickham and Wickham (2016)
add <- function(x, y = 1) {
x + y
}
formals(add)
## $x
##
##
## $y
## [1] 1
body(add)
## {
## x + y
## }
environment(add)
## <environment: R_GlobalEnv>
4.2 return
- The last expression evaluated in a function is the return value.
return(value)
forces the function to stop execution and return valueReturn value is the last executed expression, or the first executed
return()
statement. (Wickham and Wickham 2016, chap. 1)
这才是 return 真正的作用,强制停止执行。
4.3 f example
直接以f
举例,非常有经济背景的感觉。
f <- function(x) {
if (x < 0) {
-x
} else {
x
}
}
f(1)
## [1] 1
f(-1)
## [1] 1
4.4 函数是对象,可以复制
mean2 <- mean
mean2(1:10)
## [1] 5.5
4.5 函数是对象
function(x) {x+1}
## function(x) {x+1}
(function(x) {x+1})(2)
## [1] 3
4.6 A safer way to create the sequence
参考 Wickham and Wickham (2016)
参考DataCamp
df <- data.frame()
1:ncol(df)
## [1] 1 0
seq_along(df)
## integer(0)
# for (i in 1:ncol(df)) {
# print(median(df[[i]]))
# }
for (i in seq_along(df)) {
print(median(df[[i]]))
}
4.7 Scoping in R
参考 Wickham and Wickham (2016)
x <- 10
f <- function() {
x <- 1
y <- 2
c(x,y)
}
f()
## [1] 1 2
x <- 2
g <- function() {
y <- 1
c(x,y)
}
g()
## [1] 2 1
If a name isn’t defined inside a function, R will look one level up. (Wickham and Wickham 2016, chap. 1)
4.8 Write Good functions
参考 Wickham and Wickham (2016)
4.9 Function names
- Should generally be verbs
- Should be descriptive
# Good
> impute_missing()
# Bad
> imputed()
todo 我之前看到过一个文章,加进来
# Good
> collapse_years()
# Bad
> f()
> my_awesome_function()
remove_last <- function(x) {
if (length(x) <= 1) return(NULL)
x[-length(x)]
}
这个 return(NULL)
非常好,很好的判断条件。
remove_last(data.frame())
## NULL
4.10 Argument names
- Should generally be nouns
- Use the very common short names when appropriate:
- x, y, z: vectors
- df: a data frame
- i, j: numeric indices (typically rows and columns)
- n: length, or number of rows
- p: number of columns
4.11 Argument order
Arguments are often one of two types:
- Data arguments supply the data to compute on.
- Detail arguments control the details of how the computation is done.
Generally, data arguments should come first. Detail arguments should go on the end, and usually should have default values. 参考DataCamp
例如
# Alter the arguments to mean_ci
mean_ci <- function(x,level = 0.95) {
se <- sd(x) / sqrt(length(x))
alpha <- 1 - level
mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}
4.12 Passing functions as arguments
参考 Wickham and Wickham (2016)
这是我对 purrr 理解最深入的一次。
对以下函数,fun
可以放入任意的函数,因此这里函数作为了一个参数。
col_summary <- function(df, fun) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[i] <- fun(df[[i]])
}
output
}
col_summary(mtcars,mean)
## [1] 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250
## [7] 17.848750 0.437500 0.406250 3.687500 2.812500
sapply(mtcars,mean)
## mpg cyl disp hp drat wt
## 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250
## qsec vs am gear carb
## 17.848750 0.437500 0.406250 3.687500 2.812500
purrr::map_dbl(mtcars,mean)
## mpg cyl disp hp drat wt
## 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250
## qsec vs am gear carb
## 17.848750 0.437500 0.406250 3.687500 2.812500
map_dbl
returns a double vector,注意不是 list。
4.13 purrr 中自定义函数
参考 Wickham and Wickham (2016)
library(purrr)
map(mtcars, function(x) sum(is.na(x))) %>% unlist()
## mpg cyl disp hp drat wt qsec vs am gear carb
## 0 0 0 0 0 0 0 0 0 0 0
map_dbl(mtcars, function(x) sum(is.na(x)))
## mpg cyl disp hp drat wt qsec vs am gear carb
## 0 0 0 0 0 0 0 0 0 0 0
map(mtcars, ~ sum(is.na(.))) %>% unlist()
## mpg cyl disp hp drat wt qsec vs am gear carb
## 0 0 0 0 0 0 0 0 0 0 0
map_dbl(mtcars, ~ sum(is.na(.)))
## mpg cyl disp hp drat wt qsec vs am gear carb
## 0 0 0 0 0 0 0 0 0 0 0
summarise_all
的替代方式。
4.14 批量提取结果和报错信息
参考 Wickham and Wickham (2016)
urls <- list(
example = "http://example.org",
rproj = "http://www.r-project.org",
asdf = "http://asdfasdasdkfjlda"
)
suppressMessages(library(tidyverse))
# Define safe_readLines()
safe_readLines <- safely(readLines)
# Use the safe_readLines() function with map(): html
html <- map(urls, safe_readLines)
# Call str() on html
str(html)
## List of 3
## $ example:List of 2
## ..$ result: chr [1:50] "<!doctype html>" "<html>" "<head>" " <title>Example Domain</title>" ...
## ..$ error : NULL
## $ rproj :List of 2
## ..$ result: chr [1:124] "<!DOCTYPE html>" "<html lang=\"en\">" " <head>" " <meta charset=\"utf-8\">" ...
## ..$ error : NULL
## $ asdf :List of 2
## ..$ result: NULL
## ..$ error :List of 2
## .. ..$ message: chr "无法打开链结"
## .. ..$ call : language file(con, "r")
## .. ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
# Extract the result from one of the successful elements
map(html, "result")
## $example
## [1] "<!doctype html>"
## [2] "<html>"
## [3] "<head>"
## [4] " <title>Example Domain</title>"
## [5] ""
## [6] " <meta charset=\"utf-8\" />"
## [7] " <meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\" />"
## [8] " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />"
## [9] " <style type=\"text/css\">"
## [10] " body {"
## [11] " background-color: #f0f0f2;"
## [12] " margin: 0;"
## [13] " padding: 0;"
## [14] " font-family: \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;"
## [15] " "
## [16] " }"
## [17] " div {"
## [18] " width: 600px;"
## [19] " margin: 5em auto;"
## [20] " padding: 50px;"
## [21] " background-color: #fff;"
## [22] " border-radius: 1em;"
## [23] " }"
## [24] " a:link, a:visited {"
## [25] " color: #38488f;"
## [26] " text-decoration: none;"
## [27] " }"
## [28] " @media (max-width: 700px) {"
## [29] " body {"
## [30] " background-color: #fff;"
## [31] " }"
## [32] " div {"
## [33] " width: auto;"
## [34] " margin: 0 auto;"
## [35] " border-radius: 0;"
## [36] " padding: 1em;"
## [37] " }"
## [38] " }"
## [39] " </style> "
## [40] "</head>"
## [41] ""
## [42] "<body>"
## [43] "<div>"
## [44] " <h1>Example Domain</h1>"
## [45] " <p>This domain is established to be used for illustrative examples in documents. You may use this"
## [46] " domain in examples without prior coordination or asking for permission.</p>"
## [47] " <p><a href=\"http://www.iana.org/domains/example\">More information...</a></p>"
## [48] "</div>"
## [49] "</body>"
## [50] "</html>"
##
## $rproj
## [1] "<!DOCTYPE html>"
## [2] "<html lang=\"en\">"
## [3] " <head>"
## [4] " <meta charset=\"utf-8\">"
## [5] " <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">"
## [6] " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">"
## [7] " <title>R: The R Project for Statistical Computing</title>"
## [8] ""
## [9] " <link rel=\"icon\" type=\"image/png\" href=\"/favicon-32x32.png\" sizes=\"32x32\" />"
## [10] " <link rel=\"icon\" type=\"image/png\" href=\"/favicon-16x16.png\" sizes=\"16x16\" />"
## [11] ""
## [12] " <!-- Bootstrap -->"
## [13] " <link href=\"/css/bootstrap.min.css\" rel=\"stylesheet\">"
## [14] " <link href=\"/css/R.css\" rel=\"stylesheet\">"
## [15] ""
## [16] " <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->"
## [17] " <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->"
## [18] " <!--[if lt IE 9]>"
## [19] " <script src=\"https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js\"></script>"
## [20] " <script src=\"https://oss.maxcdn.com/respond/1.4.2/respond.min.js\"></script>"
## [21] " <![endif]-->"
## [22] " </head>"
## [23] " <body>"
## [24] " <div class=\"container page\">"
## [25] " <div class=\"row\">"
## [26] " <div class=\"col-xs-12 col-sm-offset-1 col-sm-2 sidebar\" role=\"navigation\">"
## [27] "<div class=\"row\">"
## [28] "<div class=\"col-xs-6 col-sm-12\">"
## [29] "<p><a href=\"/\"><img src=\"/Rlogo.png\" width=\"100\" height=\"78\" alt = \"R\" /></a></p>"
## [30] "<p><small><a href=\"/\">[Home]</a></small></p>"
## [31] "<h2 id=\"download\">Download</h2>"
## [32] "<p><a href=\"http://cran.r-project.org/mirrors.html\">CRAN</a></p>"
## [33] "<h2 id=\"r-project\">R Project</h2>"
## [34] "<ul>"
## [35] "<li><a href=\"/about.html\">About R</a></li>"
## [36] "<li><a href=\"/logo/\">Logo</a></li>"
## [37] "<li><a href=\"/contributors.html\">Contributors</a></li>"
## [38] "<li><a href=\"/news.html\">What鈥檚 New?</a></li>"
## [39] "<li><a href=\"/bugs.html\">Reporting Bugs</a></li>"
## [40] "<li><a href=\"/conferences/\">Conferences</a></li>"
## [41] "<li><a href=\"/search.html\">Search</a></li>"
## [42] "<li><a href=\"/mail.html\">Get Involved: Mailing Lists</a></li>"
## [43] "<li><a href=\"http://developer.R-project.org\">Developer Pages</a></li>"
## [44] "<li><a href=\"https://developer.r-project.org/Blog/public/\">R Blog</a></li>"
## [45] "</ul>"
## [46] "</div>"
## [47] "<div class=\"col-xs-6 col-sm-12\">"
## [48] "<h2 id=\"r-foundation\">R Foundation</h2>"
## [49] "<ul>"
## [50] "<li><a href=\"/foundation/\">Foundation</a></li>"
## [51] "<li><a href=\"/foundation/board.html\">Board</a></li>"
## [52] "<li><a href=\"/foundation/members.html\">Members</a></li>"
## [53] "<li><a href=\"/foundation/donors.html\">Donors</a></li>"
## [54] "<li><a href=\"/foundation/donations.html\">Donate</a></li>"
## [55] "</ul>"
## [56] "<h2 id=\"help-with-r\">Help With R</h2>"
## [57] "<ul>"
## [58] "<li><a href=\"/help.html\">Getting Help</a></li>"
## [59] "</ul>"
## [60] "<h2 id=\"documentation\">Documentation</h2>"
## [61] "<ul>"
## [62] "<li><a href=\"http://cran.r-project.org/manuals.html\">Manuals</a></li>"
## [63] "<li><a href=\"http://cran.r-project.org/faqs.html\">FAQs</a></li>"
## [64] "<li><a href=\"http://journal.r-project.org\">The R Journal</a></li>"
## [65] "<li><a href=\"/doc/bib/R-books.html\">Books</a></li>"
## [66] "<li><a href=\"/certification.html\">Certification</a></li>"
## [67] "<li><a href=\"/other-docs.html\">Other</a></li>"
## [68] "</ul>"
## [69] "<h2 id=\"links\">Links</h2>"
## [70] "<ul>"
## [71] "<li><a href=\"http://www.bioconductor.org\">Bioconductor</a></li>"
## [72] "<li><a href=\"/other-projects.html\">Related Projects</a></li>"
## [73] "<li><a href=\"/gsoc.html\">GSoC</a></li>"
## [74] "</ul>"
## [75] "</div>"
## [76] "</div>"
## [77] " </div>"
## [78] " <div class=\"col-xs-12 col-sm-7\">"
## [79] " <h1>The R Project for Statistical Computing</h1>"
## [80] "<h2 id=\"getting-started\">Getting Started</h2>"
## [81] "<p>R is a free software environment for statistical computing and graphics. It compiles and runs on a wide variety of UNIX platforms, Windows and MacOS. To <strong><a href=\"http://cran.r-project.org/mirrors.html\">download R</a></strong>, please choose your preferred <a href=\"http://cran.r-project.org/mirrors.html\">CRAN mirror</a>.</p>"
## [82] "<p>If you have questions about R like how to download and install the software, or what the license terms are, please read our <a href=\"http://cran.R-project.org/faqs.html\">answers to frequently asked questions</a> before you send an email.</p>"
## [83] "<h2 id=\"news\">News</h2>"
## [84] "<ul>"
## [85] "<li><p><a href=\"https://cran.r-project.org/src/base/R-3\"><strong>R version 3.6.1 (Action of the Toes)</strong></a> has been released on 2019-07-05.</p></li>"
## [86] "<li><p>useR! 2020 will take place in St.聽Louis, Missouri, USA.</p></li>"
## [87] "<li><p><a href=\"https://cran.r-project.org/src/base/R-3\"><strong>R version 3.5.3 (Great Truth)</strong></a> has been released on 2019-03-11.</p></li>"
## [88] "<li><p>The R Foundation Conference Committee has released a <a href=\"https://www.r-project.org/useR-2020_call.html\">call for proposals</a> to host useR! 2020 in North America.</p></li>"
## [89] "<li><p>You can now support the R Foundation with a renewable subscription as a <a href=\"https://www.r-project.org/foundation/donations.html\">supporting member</a></p></li>"
## [90] "<li><p>The R Foundation has been awarded the Personality/Organization of the year 2018 award by the professional association of German market and social researchers.</p></li>"
## [91] "</ul>"
## [92] "<h2 id=\"news-via-twitter\">News via Twitter</h2>"
## [93] "<a class=\"twitter-timeline\""
## [94] " href=\"https://twitter.com/_R_Foundation?ref_src=twsrc%5Etfw\""
## [95] " data-width=\"400\""
## [96] " data-show-replies=\"false\""
## [97] " data-chrome=\"noheader,nofooter,noborders\""
## [98] " data-dnt=\"true\""
## [99] " data-tweet-limit=\"3\">News from the R Foundation</a>"
## [100] "<script async"
## [101] " src=\"https://platform.twitter.com/widgets.js\""
## [102] " charset=\"utf-8\"></script>"
## [103] "<!--- (Boilerplate for release run-in)"
## [104] "- [**R version 3.1.3 (Smooth Sidewalk) prerelease versions**](http://cran.r-project.org/src/base-prerelease) will appear starting February 28. Final release is scheduled for 2015-03-09."
## [105] "-->"
## [106] " </div>"
## [107] " </div>"
## [108] " <div class=\"raw footer\">"
## [109] " © The R Foundation. For queries about this web site, please contact"
## [110] "\t<script type='text/javascript'>"
## [111] "<!--"
## [112] "var s=\"=b!isfg>#nbjmup;xfcnbtufsAs.qspkfdu/psh#?uif!xfcnbtufs=0b?\";"
## [113] "m=\"\"; for (i=0; i<s.length; i++) {if(s.charCodeAt(i) == 28){m+= '&';} else if (s.charCodeAt(i) == 23) {m+= '!';} else {m+=String.fromCharCode(s.charCodeAt(i)-1);}}document.write(m);//-->"
## [114] "\t</script>;"
## [115] " for queries about R itself, please consult the "
## [116] " <a href=\"help.html\">Getting Help</a> section."
## [117] " </div>"
## [118] " </div>"
## [119] " <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->"
## [120] " <script src=\"https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js\"></script>"
## [121] " <!-- Include all compiled plugins (below), or include individual files as needed -->"
## [122] " <script src=\"/js/bootstrap.min.js\"></script>"
## [123] " </body>"
## [124] "</html>"
##
## $asdf
## NULL
# Extract the error from the element that was unsuccessful
map(html, "error")
## $example
## NULL
##
## $rproj
## NULL
##
## $asdf
## <simpleError in file(con, "r"): 无法打开链结>
# Extract the result from one of the successful elements
tmp_res <- map(html, ~.[["result"]])
# Extract the error from the element that was unsuccessful
tmp_errs <- map(html, ~.[["error"]])
还可以反转 list 的结果提取
res <- transpose(html)[["result"]]
# Extract the errors: errs
errs <- transpose(html)[["error"]]
因此这里产生一个 idea
html %>% transpose
html %>% transpose %>% .[["result"]]
html %>% transpose %>% .[["error"]]
这样查看 result 和 error 非常方便。 DataCamp
最后分类,参考DataCamp
# Create a logical vector is_ok
is_ok <- map_lgl(errs, is_null)
# Extract the successful results
res[is_ok]
## $example
## [1] "<!doctype html>"
## [2] "<html>"
## [3] "<head>"
## [4] " <title>Example Domain</title>"
## [5] ""
## [6] " <meta charset=\"utf-8\" />"
## [7] " <meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\" />"
## [8] " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />"
## [9] " <style type=\"text/css\">"
## [10] " body {"
## [11] " background-color: #f0f0f2;"
## [12] " margin: 0;"
## [13] " padding: 0;"
## [14] " font-family: \"Open Sans\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;"
## [15] " "
## [16] " }"
## [17] " div {"
## [18] " width: 600px;"
## [19] " margin: 5em auto;"
## [20] " padding: 50px;"
## [21] " background-color: #fff;"
## [22] " border-radius: 1em;"
## [23] " }"
## [24] " a:link, a:visited {"
## [25] " color: #38488f;"
## [26] " text-decoration: none;"
## [27] " }"
## [28] " @media (max-width: 700px) {"
## [29] " body {"
## [30] " background-color: #fff;"
## [31] " }"
## [32] " div {"
## [33] " width: auto;"
## [34] " margin: 0 auto;"
## [35] " border-radius: 0;"
## [36] " padding: 1em;"
## [37] " }"
## [38] " }"
## [39] " </style> "
## [40] "</head>"
## [41] ""
## [42] "<body>"
## [43] "<div>"
## [44] " <h1>Example Domain</h1>"
## [45] " <p>This domain is established to be used for illustrative examples in documents. You may use this"
## [46] " domain in examples without prior coordination or asking for permission.</p>"
## [47] " <p><a href=\"http://www.iana.org/domains/example\">More information...</a></p>"
## [48] "</div>"
## [49] "</body>"
## [50] "</html>"
##
## $rproj
## [1] "<!DOCTYPE html>"
## [2] "<html lang=\"en\">"
## [3] " <head>"
## [4] " <meta charset=\"utf-8\">"
## [5] " <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">"
## [6] " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">"
## [7] " <title>R: The R Project for Statistical Computing</title>"
## [8] ""
## [9] " <link rel=\"icon\" type=\"image/png\" href=\"/favicon-32x32.png\" sizes=\"32x32\" />"
## [10] " <link rel=\"icon\" type=\"image/png\" href=\"/favicon-16x16.png\" sizes=\"16x16\" />"
## [11] ""
## [12] " <!-- Bootstrap -->"
## [13] " <link href=\"/css/bootstrap.min.css\" rel=\"stylesheet\">"
## [14] " <link href=\"/css/R.css\" rel=\"stylesheet\">"
## [15] ""
## [16] " <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->"
## [17] " <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->"
## [18] " <!--[if lt IE 9]>"
## [19] " <script src=\"https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js\"></script>"
## [20] " <script src=\"https://oss.maxcdn.com/respond/1.4.2/respond.min.js\"></script>"
## [21] " <![endif]-->"
## [22] " </head>"
## [23] " <body>"
## [24] " <div class=\"container page\">"
## [25] " <div class=\"row\">"
## [26] " <div class=\"col-xs-12 col-sm-offset-1 col-sm-2 sidebar\" role=\"navigation\">"
## [27] "<div class=\"row\">"
## [28] "<div class=\"col-xs-6 col-sm-12\">"
## [29] "<p><a href=\"/\"><img src=\"/Rlogo.png\" width=\"100\" height=\"78\" alt = \"R\" /></a></p>"
## [30] "<p><small><a href=\"/\">[Home]</a></small></p>"
## [31] "<h2 id=\"download\">Download</h2>"
## [32] "<p><a href=\"http://cran.r-project.org/mirrors.html\">CRAN</a></p>"
## [33] "<h2 id=\"r-project\">R Project</h2>"
## [34] "<ul>"
## [35] "<li><a href=\"/about.html\">About R</a></li>"
## [36] "<li><a href=\"/logo/\">Logo</a></li>"
## [37] "<li><a href=\"/contributors.html\">Contributors</a></li>"
## [38] "<li><a href=\"/news.html\">What鈥檚 New?</a></li>"
## [39] "<li><a href=\"/bugs.html\">Reporting Bugs</a></li>"
## [40] "<li><a href=\"/conferences/\">Conferences</a></li>"
## [41] "<li><a href=\"/search.html\">Search</a></li>"
## [42] "<li><a href=\"/mail.html\">Get Involved: Mailing Lists</a></li>"
## [43] "<li><a href=\"http://developer.R-project.org\">Developer Pages</a></li>"
## [44] "<li><a href=\"https://developer.r-project.org/Blog/public/\">R Blog</a></li>"
## [45] "</ul>"
## [46] "</div>"
## [47] "<div class=\"col-xs-6 col-sm-12\">"
## [48] "<h2 id=\"r-foundation\">R Foundation</h2>"
## [49] "<ul>"
## [50] "<li><a href=\"/foundation/\">Foundation</a></li>"
## [51] "<li><a href=\"/foundation/board.html\">Board</a></li>"
## [52] "<li><a href=\"/foundation/members.html\">Members</a></li>"
## [53] "<li><a href=\"/foundation/donors.html\">Donors</a></li>"
## [54] "<li><a href=\"/foundation/donations.html\">Donate</a></li>"
## [55] "</ul>"
## [56] "<h2 id=\"help-with-r\">Help With R</h2>"
## [57] "<ul>"
## [58] "<li><a href=\"/help.html\">Getting Help</a></li>"
## [59] "</ul>"
## [60] "<h2 id=\"documentation\">Documentation</h2>"
## [61] "<ul>"
## [62] "<li><a href=\"http://cran.r-project.org/manuals.html\">Manuals</a></li>"
## [63] "<li><a href=\"http://cran.r-project.org/faqs.html\">FAQs</a></li>"
## [64] "<li><a href=\"http://journal.r-project.org\">The R Journal</a></li>"
## [65] "<li><a href=\"/doc/bib/R-books.html\">Books</a></li>"
## [66] "<li><a href=\"/certification.html\">Certification</a></li>"
## [67] "<li><a href=\"/other-docs.html\">Other</a></li>"
## [68] "</ul>"
## [69] "<h2 id=\"links\">Links</h2>"
## [70] "<ul>"
## [71] "<li><a href=\"http://www.bioconductor.org\">Bioconductor</a></li>"
## [72] "<li><a href=\"/other-projects.html\">Related Projects</a></li>"
## [73] "<li><a href=\"/gsoc.html\">GSoC</a></li>"
## [74] "</ul>"
## [75] "</div>"
## [76] "</div>"
## [77] " </div>"
## [78] " <div class=\"col-xs-12 col-sm-7\">"
## [79] " <h1>The R Project for Statistical Computing</h1>"
## [80] "<h2 id=\"getting-started\">Getting Started</h2>"
## [81] "<p>R is a free software environment for statistical computing and graphics. It compiles and runs on a wide variety of UNIX platforms, Windows and MacOS. To <strong><a href=\"http://cran.r-project.org/mirrors.html\">download R</a></strong>, please choose your preferred <a href=\"http://cran.r-project.org/mirrors.html\">CRAN mirror</a>.</p>"
## [82] "<p>If you have questions about R like how to download and install the software, or what the license terms are, please read our <a href=\"http://cran.R-project.org/faqs.html\">answers to frequently asked questions</a> before you send an email.</p>"
## [83] "<h2 id=\"news\">News</h2>"
## [84] "<ul>"
## [85] "<li><p><a href=\"https://cran.r-project.org/src/base/R-3\"><strong>R version 3.6.1 (Action of the Toes)</strong></a> has been released on 2019-07-05.</p></li>"
## [86] "<li><p>useR! 2020 will take place in St.聽Louis, Missouri, USA.</p></li>"
## [87] "<li><p><a href=\"https://cran.r-project.org/src/base/R-3\"><strong>R version 3.5.3 (Great Truth)</strong></a> has been released on 2019-03-11.</p></li>"
## [88] "<li><p>The R Foundation Conference Committee has released a <a href=\"https://www.r-project.org/useR-2020_call.html\">call for proposals</a> to host useR! 2020 in North America.</p></li>"
## [89] "<li><p>You can now support the R Foundation with a renewable subscription as a <a href=\"https://www.r-project.org/foundation/donations.html\">supporting member</a></p></li>"
## [90] "<li><p>The R Foundation has been awarded the Personality/Organization of the year 2018 award by the professional association of German market and social researchers.</p></li>"
## [91] "</ul>"
## [92] "<h2 id=\"news-via-twitter\">News via Twitter</h2>"
## [93] "<a class=\"twitter-timeline\""
## [94] " href=\"https://twitter.com/_R_Foundation?ref_src=twsrc%5Etfw\""
## [95] " data-width=\"400\""
## [96] " data-show-replies=\"false\""
## [97] " data-chrome=\"noheader,nofooter,noborders\""
## [98] " data-dnt=\"true\""
## [99] " data-tweet-limit=\"3\">News from the R Foundation</a>"
## [100] "<script async"
## [101] " src=\"https://platform.twitter.com/widgets.js\""
## [102] " charset=\"utf-8\"></script>"
## [103] "<!--- (Boilerplate for release run-in)"
## [104] "- [**R version 3.1.3 (Smooth Sidewalk) prerelease versions**](http://cran.r-project.org/src/base-prerelease) will appear starting February 28. Final release is scheduled for 2015-03-09."
## [105] "-->"
## [106] " </div>"
## [107] " </div>"
## [108] " <div class=\"raw footer\">"
## [109] " © The R Foundation. For queries about this web site, please contact"
## [110] "\t<script type='text/javascript'>"
## [111] "<!--"
## [112] "var s=\"=b!isfg>#nbjmup;xfcnbtufsAs.qspkfdu/psh#?uif!xfcnbtufs=0b?\";"
## [113] "m=\"\"; for (i=0; i<s.length; i++) {if(s.charCodeAt(i) == 28){m+= '&';} else if (s.charCodeAt(i) == 23) {m+= '!';} else {m+=String.fromCharCode(s.charCodeAt(i)-1);}}document.write(m);//-->"
## [114] "\t</script>;"
## [115] " for queries about R itself, please consult the "
## [116] " <a href=\"help.html\">Getting Help</a> section."
## [117] " </div>"
## [118] " </div>"
## [119] " <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->"
## [120] " <script src=\"https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js\"></script>"
## [121] " <!-- Include all compiled plugins (below), or include individual files as needed -->"
## [122] " <script src=\"/js/bootstrap.min.js\"></script>"
## [123] " </body>"
## [124] "</html>"
# Extract the input from the unsuccessful results
urls[!is_ok]
## $asdf
## [1] "http://asdfasdasdkfjlda"
4.15 invoke_map() to iterate over functions
参考 Wickham and Wickham (2016)
map 函数主要是让元素批量执行一个函数,类似于 for 循环。
invoke_map
可以让一个元素执行多个函数。
args(invoke_map)
## function (.f, .x = list(NULL), ..., .env = NULL)
## NULL
# Define list of functions
funs <- list("rnorm", "runif", "rexp")
# Parameter list for rnorm()
rnorm_params <- list(mean = 10)
# Add a min element with value 0 and max element with value 5
runif_params <- list(min = 0, max = 5)
# Add a rate element with value 5
rexp_params <- list(rate = 5)
# Define params for each function
params <- list(
rnorm_params,
runif_params,
rexp_params
)
# Call invoke_map() on f supplying params as the second argument
invoke_map(funs, params, n = 5)
## [[1]]
## [1] 9.407010 9.209079 9.866699 10.141528 9.360198
##
## [[2]]
## [1] 2.122866 1.672456 3.072335 4.956065 3.410839
##
## [[3]]
## [1] 0.324964921 0.059872956 0.076464162 0.006012374 0.166175682
.x
, a list of argument-lists,把其他参数放入。
4.16 safely 对象化编程
suppressMessages(library(tidyverse))
参考 Wickham and Wickham (2016)
前面我们注意
- 函数名用动词
- 参数名用名词
- 这里的处理函数对象化的函数,一般用副词;并且可以像副词一样添加,参考 Reference
map(long_list, safely(log))
log 作为一个 参数,可以直接 input 到 safely 函数中。
同时,safely(log)
作为一个对象,可以对象化编程。
safe_log <- safely(log)
这就类似于函数可以对象化一样,参考DataCamp。
(function(x) {x^2})(2)
## [1] 4
safely(as.numeric)("It is not numeric")
## $result
## [1] NA
##
## $error
## NULL
另外这个函数也帮助了在可视化文档中展示报错。
4.17 Walk functions
参考 Wickham and Wickham (2016)
walk()
operates just likemap()
except it’s designed for functions that don’t return anything. You usewalk()
for functions with side effects like printing, plotting or saving.
- 比如批量下载,就应该用这个函数。
- 比如批量重命名
- 比如批量作图,参考 Reference
suppressMessages(library(tidyverse))
# Define list of functions
funs <- list(Normal = "rnorm", Uniform = "runif", Exp = "rexp")
# Define params
params <- list(
Normal = list(mean = 10),
Uniform = list(min = 0, max = 5),
Exp = list(rate = 5)
)
# Assign the simulated samples to sims
sims <- invoke_map(funs, params, n = 50)
# Use walk() to make a histogram of each element in sims
walk(sims,hist)
也可以用 map
map(sims,hist)
## $Normal
## $breaks
## [1] 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0
##
## $counts
## [1] 1 0 1 4 7 12 11 5 6 3
##
## $density
## [1] 0.04 0.00 0.04 0.16 0.28 0.48 0.44 0.20 0.24 0.12
##
## $mids
## [1] 7.25 7.75 8.25 8.75 9.25 9.75 10.25 10.75 11.25 11.75
##
## $xname
## [1] ".x[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## $Uniform
## $breaks
## [1] 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0
##
## $counts
## [1] 8 4 7 7 3 1 5 6 3 6
##
## $density
## [1] 0.32 0.16 0.28 0.28 0.12 0.04 0.20 0.24 0.12 0.24
##
## $mids
## [1] 0.25 0.75 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75
##
## $xname
## [1] ".x[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## $Exp
## $breaks
## [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8
##
## $counts
## [1] 15 13 8 7 4 2 0 1
##
## $density
## [1] 3.0 2.6 1.6 1.4 0.8 0.4 0.0 0.2
##
## $mids
## [1] 0.05 0.15 0.25 0.35 0.45 0.55 0.65 0.75
##
## $xname
## [1] ".x[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
但是要产生不必要的日志。
# Replace "Sturges" with reasonable breaks for each sample
breaks_list <- list(
Normal = seq(6, 16, 0.5),
Uniform = seq(0, 5, 0.25),
Exp = seq(0, 1.5, 0.1)
)
# Use walk2() to make histograms with the right breaks
walk2(sims,breaks_list,hist)
分类分的不好。
# Turn this snippet into find_breaks()
find_breaks <- function(x) {
rng <- range(x, na.rm = TRUE)
seq(rng[1], rng[2], length.out = 30)
}
# Call find_breaks() on sims[[1]]
find_breaks(sims[[1]])
## [1] 7.247921 7.404957 7.561994 7.719030 7.876067 8.033103 8.190139
## [8] 8.347176 8.504212 8.661249 8.818285 8.975321 9.132358 9.289394
## [15] 9.446431 9.603467 9.760503 9.917540 10.074576 10.231612 10.388649
## [22] 10.545685 10.702722 10.859758 11.016794 11.173831 11.330867 11.487904
## [29] 11.644940 11.801976
# Use map() to iterate find_breaks() over sims: nice_breaks
nice_breaks <- map(sims,find_breaks)
# Use nice_breaks as the second argument to walk2()
walk2(sims,nice_breaks,hist)
# Increase sample size to 1000
sims <- invoke_map(funs, params, n = 1000)
# Compute nice_breaks (don't change this)
nice_breaks <- map(sims, find_breaks)
# Create a vector nice_titles
nice_titles <- c("Normal(10, 1)", "Uniform(0, 5)", "Exp(5)")
# Use pwalk() instead of walk2()
pwalk(list(x = sims, breaks = nice_breaks, main = nice_titles), hist, xlab = "")
4.18 Pipiline
参考DataCamp
sims %>%
walk(hist) %>%
map(summary)
## $Normal
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.410 9.417 10.022 10.013 10.687 12.684
##
## $Uniform
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.008928 1.288595 2.522302 2.534203 3.807468 4.999127
##
## $Exp
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0002618 0.0540249 0.1353825 0.1955224 0.2664505 1.2375382
4.19 使用 [
加入参数drop = FALSE
参考 Wickham and Wickham (2016)
df <- data.frame(z = 1:3)
str(df)
## 'data.frame': 3 obs. of 1 variable:
## $ z: int 1 2 3
str(df[1,])
## int 1
数据类型改变了。
str(df[1,,drop=F])
## 'data.frame': 1 obs. of 1 variable:
## $ z: int 1
4.20 Non-standard evaluation definition
参考 Wickham and Wickham (2016)
例如
library(magrittr)
subset(mtcars,mpg > 7) %>% head
而不是
subset(mtcars, mtcars$mpg > 7) %>% head
这样节约了时间,但是只有利于数据分析,而不利于编程。
todo 总结博客内的,imp_rmd 内的。
4.21 function stopifnot
参考 Wickham and Wickham (2016)
# Define troublesome x and y
x <- c(NA, NA, NA)
y <- c( 1, NA, NA, NA)
both_na <- function(x, y) {
# Add stopifnot() to check length of x and y
stopifnot(length(x) == length(y))
sum(is.na(x) & is.na(y))
}
# Call both_na() on x and y
library(purrr)
safely(both_na)(x, y)
## $result
## NULL
##
## $error
## <simpleError in .f(...): length(x) == length(y) is not TRUE>
前后类似,stopifnot
是快捷方式的功能。
both_na <- function(x, y) {
# Add stopifnot() to check length of x and y
if (length(x) != length(y)) {
stop("length(x) == length(y) is not TRUE")
}
sum(is.na(x) & is.na(y))
}
# Call both_na() on x and y
safely(both_na)(x, y)
## $result
## NULL
##
## $error
## <simpleError in .f(...): length(x) == length(y) is not TRUE>
5 group_ 函数
group_modify
最早出现在 www.tidyverse.org。
library(tidyverse)
iris %>%
group_by(Species) %>%
group_modify( ~ {
quantile(.x$Petal.Length, probs = c(0.25, 0.5, 0.75)) %>%
tibble::enframe(name = "prob", value = "quantile")
})
参考 speakerdeck.com 从 pandas 角度理解,
iris %>%
group_by(Species)
以后,
- Split data in groups
- Apply something for each group
产生的对象是 (group, keys)
fun <- function(group, keys){
quantile(group$Petal.Length, probs = c(0.25, 0.5, 0.75)) %>%
tibble::enframe(name = "prob", value = "quantile")
}
iris %>%
group_by(Species) %>%
group_modify(fun)
再举一个例子,进行批量回归。
fun2 <- function(group, keys) {
lm(Petal.Length ~ Sepal.Length, data = group) %>%
broom::tidy()
}
iris %>%
group_by(Species) %>%
group_modify(fun2)
iris %>%
group_by(Species) %>%
group_map(fun2) # 反馈 list 结构
## [[1]]
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.803 0.344 2.34 0.0238
## 2 Sepal.Length 0.132 0.0685 1.92 0.0607
##
## [[2]]
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.185 0.514 0.360 7.20e- 1
## 2 Sepal.Length 0.686 0.0863 7.95 2.59e-10
##
## [[3]]
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.610 0.417 1.46 1.50e- 1
## 2 Sepal.Length 0.750 0.0630 11.9 6.30e-16
iris %>%
group_by(Species) %>%
group_map(fun2) %>% # 反馈 list 结构
bind_rows()
fun3 <- function(group, keys) {
lm(Petal.Length ~ Sepal.Length, data = group) %>%
broom::tidy() %>%
mutate(Species = keys$Species)
}
iris %>%
group_by(Species) %>%
group_map(fun3) %>% # 反馈 list 结构
bind_rows()
提取
- group
- keys
- row index
iris %>%
group_by(Species) %>%
group_data()
iris %>%
group_by(Species) %>%
group_keys()
iris %>%
group_by(Species) %>%
group_rows()
## [[1]]
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
## [47] 47 48 49 50
##
## [[2]]
## [1] 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
## [18] 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
## [35] 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
##
## [[3]]
## [1] 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
## [18] 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
## [35] 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
FAY, Colin. 2019. “Intermediate Functional Programming with Purrr.” 2019. https://www.datacamp.com/courses/intermediate-functional-programming-with-purrr.
Fournier, Auriel. 2018. “Foundations of Functional Programming with Purrr.” 2018. https://www.datacamp.com/courses/foundations-of-functional-programming-with-purrr.
Ghosh, Saurav. 2019. “Functional Programming with Purrr.” GitHub. 2019. https://github.com/sauravg94/purrr-workshop.
Wickham, Hadley, and Charlotte Wickham. 2016. “Writing Functions in R.” 2016. https://www.datacamp.com/courses/writing-functions-in-r.