skimr
Version:
CLI EDA for CSVs
871 lines (835 loc) • 36.9 kB
HTML
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>purrr <-> base R</title>
<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
var i, h, a;
for (i = 0; i < hs.length; i++) {
h = hs[i];
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
a = h.attributes;
while (a.length > 0) h.removeAttribute(a[0].name);
}
});
</script>
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">
code {
white-space: pre;
}
.sourceCode {
overflow: visible;
}
</style>
<style type="text/css" data-origin="pandoc">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; }
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.at { color: #7d9029; }
code span.bn { color: #40a070; }
code span.bu { color: #008000; }
code span.cf { color: #007020; font-weight: bold; }
code span.ch { color: #4070a0; }
code span.cn { color: #880000; }
code span.co { color: #60a0b0; font-style: italic; }
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.do { color: #ba2121; font-style: italic; }
code span.dt { color: #902000; }
code span.dv { color: #40a070; }
code span.er { color: #ff0000; font-weight: bold; }
code span.ex { }
code span.fl { color: #40a070; }
code span.fu { color: #06287e; }
code span.im { color: #008000; font-weight: bold; }
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.kw { color: #007020; font-weight: bold; }
code span.op { color: #666666; }
code span.ot { color: #007020; }
code span.pp { color: #bc7a00; }
code span.sc { color: #4070a0; }
code span.ss { color: #bb6688; }
code span.st { color: #4070a0; }
code span.va { color: #19177c; }
code span.vs { color: #4070a0; }
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; }
</style>
<script>
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
var sheets = document.styleSheets;
for (var i = 0; i < sheets.length; i++) {
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
var j = 0;
while (j < rules.length) {
var rule = rules[j];
// check if there is a div.sourceCode rule
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
j++;
continue;
}
var style = rule.style.cssText;
// check if color or background-color is set
if (rule.style.color === '' && rule.style.backgroundColor === '') {
j++;
continue;
}
// replace div.sourceCode by a pre.sourceCode rule
sheets[i].deleteRule(j);
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
}
}
})();
</script>
<style type="text/css">body {
background-color: #fff;
margin: 1em auto;
max-width: 700px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.35;
}
#TOC {
clear: both;
margin: 0 0 10px 10px;
padding: 4px;
width: 400px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table {
margin: 1em auto;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table, table th, table td {
border-left-style: none;
border-right-style: none;
}
table thead, table tr.even {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 0.25em 0.75em;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f7f7f7;
border-radius: 3px;
color: #333;
white-space: pre-wrap;
}
pre {
border-radius: 3px;
margin: 5px 0px 10px 0px;
padding: 10px;
}
pre:not([class]) {
background-color: #f7f7f7;
}
code {
font-family: Consolas, Monaco, 'Courier New', monospace;
font-size: 85%;
}
p > code, li > code {
padding: 2px 0px;
}
div.figure {
text-align: center;
}
img {
background-color: #FFFFFF;
padding: 2px;
border: 1px solid #DDDDDD;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f7f7f7;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f7f7f7;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f7f7f7;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }
code > span.kw { color: #555; font-weight: bold; }
code > span.dt { color: #902000; }
code > span.dv { color: #40a070; }
code > span.bn { color: #d14; }
code > span.fl { color: #d14; }
code > span.ch { color: #d14; }
code > span.st { color: #d14; }
code > span.co { color: #888888; font-style: italic; }
code > span.ot { color: #007020; }
code > span.al { color: #ff0000; font-weight: bold; }
code > span.fu { color: #900; font-weight: bold; }
code > span.er { color: #a61717; background-color: #e3d2d2; }
</style>
</head>
<body>
<h1 class="title toc-ignore">purrr <-> base R</h1>
<div id="introduction" class="section level1">
<h1>Introduction</h1>
<p>This vignette compares purrr’s functionals to their base R
equivalents, focusing primarily on the map family and related functions.
This helps those familiar with base R understand better what purrr does,
and shows purrr users how you might express the same ideas in base R
code. We’ll start with a rough overview of the major differences, give a
rough translation guide, and then show a few examples.</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" tabindex="-1"></a><span class="fu">library</span>(purrr)</span>
<span id="cb1-2"><a href="#cb1-2" tabindex="-1"></a><span class="fu">library</span>(tibble)</span></code></pre></div>
<div id="key-differences" class="section level2">
<h2>Key differences</h2>
<p>There are two primary differences between the base apply family and
the purrr map family: purrr functions are named more consistently, and
more fully explore the space of input and output variants.</p>
<ul>
<li><p>purrr functions consistently use <code>.</code> as prefix to
avoid <a href="https://adv-r.hadley.nz/functionals.html#argument-names">inadvertently
matching arguments</a> of the purrr function, instead of the function
that you’re trying to call. Base functions use a variety of techniques
including upper case (e.g. <code>lapply(X, FUN, ...)</code>) or require
anonymous functions (e.g. <code>Map()</code>).</p></li>
<li><p>All map functions are type stable: you can predict the type of
the output using little information about the inputs. In contrast, the
base functions <code>sapply()</code> and <code>mapply()</code>
automatically simplify making the return value hard to predict.</p></li>
<li><p>The map functions all start with the data, followed by the
function, then any additional constant argument. Most base apply
functions also follow this pattern, but <code>mapply()</code> starts
with the function, and <code>Map()</code> has no way to supply
additional constant arguments.</p></li>
<li><p>purrr functions provide all combinations of input and output
variants, and include variants specifically for the common two argument
case.</p></li>
</ul>
</div>
<div id="direct-translations" class="section level2">
<h2>Direct translations</h2>
<p>The following sections give a high-level translation between base R
commands and their purrr equivalents. See function documentation for the
details.</p>
<div id="map-functions" class="section level3">
<h3><code>Map</code> functions</h3>
<p>Here <code>x</code> denotes a vector and <code>f</code> denotes a
function</p>
<table>
<colgroup>
<col width="24%" />
<col width="24%" />
<col width="24%" />
<col width="26%" />
</colgroup>
<thead>
<tr class="header">
<th>Output</th>
<th>Input</th>
<th>Base R</th>
<th>purrr</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>List</td>
<td>1 vector</td>
<td><code>lapply()</code></td>
<td><code>map()</code></td>
</tr>
<tr class="even">
<td>List</td>
<td>2 vectors</td>
<td><code>mapply()</code>, <code>Map()</code></td>
<td><code>map2()</code></td>
</tr>
<tr class="odd">
<td>List</td>
<td>>2 vectors</td>
<td><code>mapply()</code>, <code>Map()</code></td>
<td><code>pmap()</code></td>
</tr>
<tr class="even">
<td>Atomic vector of desired type</td>
<td>1 vector</td>
<td><code>vapply()</code></td>
<td><code>map_lgl()</code> (logical), <code>map_int()</code> (integer),
<code>map_dbl()</code> (double), <code>map_chr()</code> (character),
<code>map_raw()</code> (raw)</td>
</tr>
<tr class="odd">
<td>Atomic vector of desired type</td>
<td>2 vectors</td>
<td><code>mapply()</code>, <code>Map()</code>, then <code>is.*()</code>
to check type</td>
<td><code>map2_lgl()</code> (logical), <code>map2_int()</code>
(integer), <code>map2_dbl()</code> (double), <code>map2_chr()</code>
(character), <code>map2_raw()</code> (raw)</td>
</tr>
<tr class="even">
<td>Atomic vector of desired type</td>
<td>>2 vectors</td>
<td><code>mapply()</code>, <code>Map()</code>, then <code>is.*()</code>
to check type</td>
<td><code>pmap_lgl()</code> (logical), <code>pmap_int()</code>
(integer), <code>pmap_dbl()</code> (double), <code>pmap_chr()</code>
(character), <code>pmap_raw()</code> (raw)</td>
</tr>
<tr class="odd">
<td>Side effect only</td>
<td>1 vector</td>
<td>loops</td>
<td><code>walk()</code></td>
</tr>
<tr class="even">
<td>Side effect only</td>
<td>2 vectors</td>
<td>loops</td>
<td><code>walk2()</code></td>
</tr>
<tr class="odd">
<td>Side effect only</td>
<td>>2 vectors</td>
<td>loops</td>
<td><code>pwalk()</code></td>
</tr>
<tr class="even">
<td>Data frame (<code>rbind</code> outputs)</td>
<td>1 vector</td>
<td><code>lapply()</code> then <code>rbind()</code></td>
<td><code>map_dfr()</code></td>
</tr>
<tr class="odd">
<td>Data frame (<code>rbind</code> outputs)</td>
<td>2 vectors</td>
<td><code>mapply()</code>/<code>Map()</code> then
<code>rbind()</code></td>
<td><code>map2_dfr()</code></td>
</tr>
<tr class="even">
<td>Data frame (<code>rbind</code> outputs)</td>
<td>>2 vectors</td>
<td><code>mapply()</code>/<code>Map()</code> then
<code>rbind()</code></td>
<td><code>pmap_dfr()</code></td>
</tr>
<tr class="odd">
<td>Data frame (<code>cbind</code> outputs)</td>
<td>1 vector</td>
<td><code>lapply()</code> then <code>cbind()</code></td>
<td><code>map_dfc()</code></td>
</tr>
<tr class="even">
<td>Data frame (<code>cbind</code> outputs)</td>
<td>2 vectors</td>
<td><code>mapply()</code>/<code>Map()</code> then
<code>cbind()</code></td>
<td><code>map2_dfc()</code></td>
</tr>
<tr class="odd">
<td>Data frame (<code>cbind</code> outputs)</td>
<td>>2 vectors</td>
<td><code>mapply()</code>/<code>Map()</code> then
<code>cbind()</code></td>
<td><code>pmap_dfc()</code></td>
</tr>
<tr class="even">
<td>Any</td>
<td>Vector and its names</td>
<td><code>l/s/vapply(X, function(x) f(x, names(x)))</code> or
<code>mapply/Map(f, x, names(x))</code></td>
<td><code>imap()</code>, <code>imap_*()</code> (<code>lgl</code>,
<code>dbl</code>, <code>dfr</code>, and etc. just like for
<code>map()</code>, <code>map2()</code>, and <code>pmap()</code>)</td>
</tr>
<tr class="odd">
<td>Any</td>
<td>Selected elements of the vector</td>
<td><code>l/s/vapply(X[index], FUN, ...)</code></td>
<td><code>map_if()</code>, <code>map_at()</code></td>
</tr>
<tr class="even">
<td>List</td>
<td>Recursively apply to list within list</td>
<td><code>rapply()</code></td>
<td><code>map_depth()</code></td>
</tr>
<tr class="odd">
<td>List</td>
<td>List only</td>
<td><code>lapply()</code></td>
<td><code>lmap()</code>, <code>lmap_at()</code>,
<code>lmap_if()</code></td>
</tr>
</tbody>
</table>
</div>
<div id="extractor-shorthands" class="section level3">
<h3>Extractor shorthands</h3>
<p>Since a common use case for map functions is list extracting
components, purrr provides a handful of shortcut functions for various
uses of <code>[[</code>.</p>
<table>
<colgroup>
<col width="26%" />
<col width="36%" />
<col width="37%" />
</colgroup>
<thead>
<tr class="header">
<th>Input</th>
<th>base R</th>
<th>purrr</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>Extract by name</td>
<td><code>lapply(x, `[[`, "a")</code></td>
<td><code>map(x, "a")</code></td>
</tr>
<tr class="even">
<td>Extract by position</td>
<td><code>lapply(x, `[[`, 3)</code></td>
<td><code>map(x, 3)</code></td>
</tr>
<tr class="odd">
<td>Extract deeply</td>
<td><code>lapply(x, \(y) y[[1]][["x"]][[3]])</code></td>
<td><code>map(x, list(1, "x", 3))</code></td>
</tr>
<tr class="even">
<td>Extract with default value</td>
<td><code>lapply(x, function(y) tryCatch(y[[3]], error = function(e) NA))</code></td>
<td><code>map(x, 3, .default = NA)</code></td>
</tr>
</tbody>
</table>
</div>
<div id="predicates" class="section level3">
<h3>Predicates</h3>
<p>Here <code>p</code>, a predicate, denotes a function that returns
<code>TRUE</code> or <code>FALSE</code> indicating whether an object
fulfills a criterion, e.g. <code>is.character()</code>.</p>
<table>
<colgroup>
<col width="40%" />
<col width="27%" />
<col width="31%" />
</colgroup>
<thead>
<tr class="header">
<th>Description</th>
<th>base R</th>
<th>purrr</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>Find a matching element</td>
<td><code>Find(p, x)</code></td>
<td><code>detect(x, p)</code>,</td>
</tr>
<tr class="even">
<td>Find position of matching element</td>
<td><code>Position(p, x)</code></td>
<td><code>detect_index(x, p)</code></td>
</tr>
<tr class="odd">
<td>Do all elements of a vector satisfy a predicate?</td>
<td><code>all(sapply(x, p))</code></td>
<td><code>every(x, p)</code></td>
</tr>
<tr class="even">
<td>Does any elements of a vector satisfy a predicate?</td>
<td><code>any(sapply(x, p))</code></td>
<td><code>some(x, p)</code></td>
</tr>
<tr class="odd">
<td>Does a list contain an object?</td>
<td><code>any(sapply(x, identical, obj))</code></td>
<td><code>has_element(x, obj)</code></td>
</tr>
<tr class="even">
<td>Keep elements that satisfy a predicate</td>
<td><code>x[sapply(x, p)]</code></td>
<td><code>keep(x, p)</code></td>
</tr>
<tr class="odd">
<td>Discard elements that satisfy a predicate</td>
<td><code>x[!sapply(x, p)]</code></td>
<td><code>discard(x, p)</code></td>
</tr>
<tr class="even">
<td>Negate a predicate function</td>
<td><code>function(x) !p(x)</code></td>
<td><code>negate(p)</code></td>
</tr>
</tbody>
</table>
</div>
<div id="other-vector-transforms" class="section level3">
<h3>Other vector transforms</h3>
<table>
<colgroup>
<col width="40%" />
<col width="27%" />
<col width="31%" />
</colgroup>
<thead>
<tr class="header">
<th>Description</th>
<th>base R</th>
<th>purrr</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>Accumulate intermediate results of a vector reduction</td>
<td><code>Reduce(f, x, accumulate = TRUE)</code></td>
<td><code>accumulate(x, f)</code></td>
</tr>
<tr class="even">
<td>Recursively combine two lists</td>
<td><code>c(X, Y)</code>, but more complicated to merge recursively</td>
<td><code>list_merge()</code>, <code>list_modify()</code></td>
</tr>
<tr class="odd">
<td>Reduce a list to a single value by iteratively applying a binary
function</td>
<td><code>Reduce(f, x)</code></td>
<td><code>reduce(x, f)</code></td>
</tr>
</tbody>
</table>
</div>
</div>
<div id="examples" class="section level2">
<h2>Examples</h2>
<div id="varying-inputs" class="section level3">
<h3>Varying inputs</h3>
<div id="one-input" class="section level4">
<h4>One input</h4>
<p>Suppose we would like to generate a list of samples of 5 from normal
distributions with different means:</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" tabindex="-1"></a>means <span class="ot"><-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">4</span></span></code></pre></div>
<p>There’s little difference when generating the samples:</p>
<ul>
<li><p>Base R uses <code>lapply()</code>:</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">2020</span>)</span>
<span id="cb3-2"><a href="#cb3-2" tabindex="-1"></a>samples <span class="ot"><-</span> <span class="fu">lapply</span>(means, rnorm, <span class="at">n =</span> <span class="dv">5</span>, <span class="at">sd =</span> <span class="dv">1</span>)</span>
<span id="cb3-3"><a href="#cb3-3" tabindex="-1"></a><span class="fu">str</span>(samples)</span>
<span id="cb3-4"><a href="#cb3-4" tabindex="-1"></a><span class="co">#> List of 4</span></span>
<span id="cb3-5"><a href="#cb3-5" tabindex="-1"></a><span class="co">#> $ : num [1:5] 1.377 1.302 -0.098 -0.13 -1.797</span></span>
<span id="cb3-6"><a href="#cb3-6" tabindex="-1"></a><span class="co">#> $ : num [1:5] 2.72 2.94 1.77 3.76 2.12</span></span>
<span id="cb3-7"><a href="#cb3-7" tabindex="-1"></a><span class="co">#> $ : num [1:5] 2.15 3.91 4.2 2.63 2.88</span></span>
<span id="cb3-8"><a href="#cb3-8" tabindex="-1"></a><span class="co">#> $ : num [1:5] 5.8 5.704 0.961 1.711 4.058</span></span></code></pre></div></li>
<li><p>purrr uses <code>map()</code>:</p>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">2020</span>)</span>
<span id="cb4-2"><a href="#cb4-2" tabindex="-1"></a>samples <span class="ot"><-</span> <span class="fu">map</span>(means, rnorm, <span class="at">n =</span> <span class="dv">5</span>, <span class="at">sd =</span> <span class="dv">1</span>)</span>
<span id="cb4-3"><a href="#cb4-3" tabindex="-1"></a><span class="fu">str</span>(samples)</span>
<span id="cb4-4"><a href="#cb4-4" tabindex="-1"></a><span class="co">#> List of 4</span></span>
<span id="cb4-5"><a href="#cb4-5" tabindex="-1"></a><span class="co">#> $ : num [1:5] 1.377 1.302 -0.098 -0.13 -1.797</span></span>
<span id="cb4-6"><a href="#cb4-6" tabindex="-1"></a><span class="co">#> $ : num [1:5] 2.72 2.94 1.77 3.76 2.12</span></span>
<span id="cb4-7"><a href="#cb4-7" tabindex="-1"></a><span class="co">#> $ : num [1:5] 2.15 3.91 4.2 2.63 2.88</span></span>
<span id="cb4-8"><a href="#cb4-8" tabindex="-1"></a><span class="co">#> $ : num [1:5] 5.8 5.704 0.961 1.711 4.058</span></span></code></pre></div></li>
</ul>
</div>
<div id="two-inputs" class="section level4">
<h4>Two inputs</h4>
<p>Lets make the example a little more complicated by also varying the
standard deviations:</p>
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" tabindex="-1"></a>means <span class="ot"><-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">4</span></span>
<span id="cb5-2"><a href="#cb5-2" tabindex="-1"></a>sds <span class="ot"><-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">4</span></span></code></pre></div>
<ul>
<li><p>This is relatively tricky in base R because we have to adjust a
number of <code>mapply()</code>’s defaults.</p>
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">2020</span>)</span>
<span id="cb6-2"><a href="#cb6-2" tabindex="-1"></a>samples <span class="ot"><-</span> <span class="fu">mapply</span>(</span>
<span id="cb6-3"><a href="#cb6-3" tabindex="-1"></a> rnorm, </span>
<span id="cb6-4"><a href="#cb6-4" tabindex="-1"></a> <span class="at">mean =</span> means, </span>
<span id="cb6-5"><a href="#cb6-5" tabindex="-1"></a> <span class="at">sd =</span> sds, </span>
<span id="cb6-6"><a href="#cb6-6" tabindex="-1"></a> <span class="at">MoreArgs =</span> <span class="fu">list</span>(<span class="at">n =</span> <span class="dv">5</span>), </span>
<span id="cb6-7"><a href="#cb6-7" tabindex="-1"></a> <span class="at">SIMPLIFY =</span> <span class="cn">FALSE</span></span>
<span id="cb6-8"><a href="#cb6-8" tabindex="-1"></a>)</span>
<span id="cb6-9"><a href="#cb6-9" tabindex="-1"></a><span class="fu">str</span>(samples)</span>
<span id="cb6-10"><a href="#cb6-10" tabindex="-1"></a><span class="co">#> List of 4</span></span>
<span id="cb6-11"><a href="#cb6-11" tabindex="-1"></a><span class="co">#> $ : num [1:5] 1.377 1.302 -0.098 -0.13 -1.797</span></span>
<span id="cb6-12"><a href="#cb6-12" tabindex="-1"></a><span class="co">#> $ : num [1:5] 3.44 3.88 1.54 5.52 2.23</span></span>
<span id="cb6-13"><a href="#cb6-13" tabindex="-1"></a><span class="co">#> $ : num [1:5] 0.441 5.728 6.589 1.885 2.63</span></span>
<span id="cb6-14"><a href="#cb6-14" tabindex="-1"></a><span class="co">#> $ : num [1:5] 11.2 10.82 -8.16 -5.16 4.23</span></span></code></pre></div>
<p>Alternatively, we could use <code>Map()</code> which doesn’t simply,
but also doesn’t take any constant arguments, so we need to use an
anonymous function:</p>
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" tabindex="-1"></a>samples <span class="ot"><-</span> <span class="fu">Map</span>(<span class="cf">function</span>(...) <span class="fu">rnorm</span>(..., <span class="at">n =</span> <span class="dv">5</span>), <span class="at">mean =</span> means, <span class="at">sd =</span> sds)</span></code></pre></div>
<p>In R 4.1 and up, you could use the shorter anonymous function
form:</p>
<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" tabindex="-1"></a>samples <span class="ot"><-</span> <span class="fu">Map</span>(\(...) <span class="fu">rnorm</span>(..., <span class="at">n =</span> <span class="dv">5</span>), <span class="at">mean =</span> means, <span class="at">sd =</span> sds)</span></code></pre></div></li>
<li><p>Working with a pair of vectors is a common situation so purrr
provides the <code>map2()</code> family of functions:</p>
<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">2020</span>)</span>
<span id="cb9-2"><a href="#cb9-2" tabindex="-1"></a>samples <span class="ot"><-</span> <span class="fu">map2</span>(means, sds, rnorm, <span class="at">n =</span> <span class="dv">5</span>)</span>
<span id="cb9-3"><a href="#cb9-3" tabindex="-1"></a><span class="fu">str</span>(samples)</span>
<span id="cb9-4"><a href="#cb9-4" tabindex="-1"></a><span class="co">#> List of 4</span></span>
<span id="cb9-5"><a href="#cb9-5" tabindex="-1"></a><span class="co">#> $ : num [1:5] 1.377 1.302 -0.098 -0.13 -1.797</span></span>
<span id="cb9-6"><a href="#cb9-6" tabindex="-1"></a><span class="co">#> $ : num [1:5] 3.44 3.88 1.54 5.52 2.23</span></span>
<span id="cb9-7"><a href="#cb9-7" tabindex="-1"></a><span class="co">#> $ : num [1:5] 0.441 5.728 6.589 1.885 2.63</span></span>
<span id="cb9-8"><a href="#cb9-8" tabindex="-1"></a><span class="co">#> $ : num [1:5] 11.2 10.82 -8.16 -5.16 4.23</span></span></code></pre></div></li>
</ul>
</div>
<div id="any-number-of-inputs" class="section level4">
<h4>Any number of inputs</h4>
<p>We can make the challenge still more complex by also varying the
number of samples:</p>
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" tabindex="-1"></a>ns <span class="ot"><-</span> <span class="dv">4</span><span class="sc">:</span><span class="dv">1</span></span></code></pre></div>
<ul>
<li><p>Using base R’s <code>Map()</code> becomes more straightforward
because there are no constant arguments.</p>
<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">2020</span>)</span>
<span id="cb11-2"><a href="#cb11-2" tabindex="-1"></a>samples <span class="ot"><-</span> <span class="fu">Map</span>(rnorm, <span class="at">mean =</span> means, <span class="at">sd =</span> sds, <span class="at">n =</span> ns)</span>
<span id="cb11-3"><a href="#cb11-3" tabindex="-1"></a><span class="fu">str</span>(samples)</span>
<span id="cb11-4"><a href="#cb11-4" tabindex="-1"></a><span class="co">#> List of 4</span></span>
<span id="cb11-5"><a href="#cb11-5" tabindex="-1"></a><span class="co">#> $ : num [1:4] 1.377 1.302 -0.098 -0.13</span></span>
<span id="cb11-6"><a href="#cb11-6" tabindex="-1"></a><span class="co">#> $ : num [1:3] -3.59 3.44 3.88</span></span>
<span id="cb11-7"><a href="#cb11-7" tabindex="-1"></a><span class="co">#> $ : num [1:2] 2.31 8.28</span></span>
<span id="cb11-8"><a href="#cb11-8" tabindex="-1"></a><span class="co">#> $ : num 4.47</span></span></code></pre></div></li>
<li><p>In purrr, we need to switch from <code>map2()</code> to
<code>pmap()</code> which takes a list of any number of arguments.</p>
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">2020</span>)</span>
<span id="cb12-2"><a href="#cb12-2" tabindex="-1"></a>samples <span class="ot"><-</span> <span class="fu">pmap</span>(<span class="fu">list</span>(<span class="at">mean =</span> means, <span class="at">sd =</span> sds, <span class="at">n =</span> ns), rnorm)</span>
<span id="cb12-3"><a href="#cb12-3" tabindex="-1"></a><span class="fu">str</span>(samples)</span>
<span id="cb12-4"><a href="#cb12-4" tabindex="-1"></a><span class="co">#> List of 4</span></span>
<span id="cb12-5"><a href="#cb12-5" tabindex="-1"></a><span class="co">#> $ : num [1:4] 1.377 1.302 -0.098 -0.13</span></span>
<span id="cb12-6"><a href="#cb12-6" tabindex="-1"></a><span class="co">#> $ : num [1:3] -3.59 3.44 3.88</span></span>
<span id="cb12-7"><a href="#cb12-7" tabindex="-1"></a><span class="co">#> $ : num [1:2] 2.31 8.28</span></span>
<span id="cb12-8"><a href="#cb12-8" tabindex="-1"></a><span class="co">#> $ : num 4.47</span></span></code></pre></div></li>
</ul>
</div>
</div>
<div id="outputs" class="section level3">
<h3>Outputs</h3>
<p>Given the samples, imagine we want to compute their means. A mean is
a single number, so we want the output to be a numeric vector rather
than a list.</p>
<ul>
<li><p>There are two options in base R: <code>vapply()</code> or
<code>sapply()</code>. <code>vapply()</code> requires you to specific
the output type (so is relatively verbose), but will always return a
numeric vector. <code>sapply()</code> is concise, but if you supply an
empty list you’ll get a list instead of a numeric vector.</p>
<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" tabindex="-1"></a><span class="co"># type stable</span></span>
<span id="cb13-2"><a href="#cb13-2" tabindex="-1"></a>medians <span class="ot"><-</span> <span class="fu">vapply</span>(samples, median, <span class="at">FUN.VALUE =</span> <span class="fu">numeric</span>(1L))</span>
<span id="cb13-3"><a href="#cb13-3" tabindex="-1"></a>medians</span>
<span id="cb13-4"><a href="#cb13-4" tabindex="-1"></a><span class="co">#> [1] 0.6017626 3.4411470 5.2946304 4.4694671</span></span>
<span id="cb13-5"><a href="#cb13-5" tabindex="-1"></a></span>
<span id="cb13-6"><a href="#cb13-6" tabindex="-1"></a><span class="co"># not type stable</span></span>
<span id="cb13-7"><a href="#cb13-7" tabindex="-1"></a>medians <span class="ot"><-</span> <span class="fu">sapply</span>(samples, median)</span></code></pre></div></li>
<li><p>purrr is little more compact because we can use
<code>map_dbl()</code>.</p>
<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" tabindex="-1"></a>medians <span class="ot"><-</span> <span class="fu">map_dbl</span>(samples, median)</span>
<span id="cb14-2"><a href="#cb14-2" tabindex="-1"></a>medians</span>
<span id="cb14-3"><a href="#cb14-3" tabindex="-1"></a><span class="co">#> [1] 0.6017626 3.4411470 5.2946304 4.4694671</span></span></code></pre></div></li>
</ul>
<p>What if we want just the side effect, such as a plot or a file
output, but not the returned values?</p>
<ul>
<li><p>In base R we can either use a for loop or hide the results of
<code>lapply</code>.</p>
<div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" tabindex="-1"></a><span class="co"># for loop</span></span>
<span id="cb15-2"><a href="#cb15-2" tabindex="-1"></a><span class="cf">for</span> (s <span class="cf">in</span> samples) {</span>
<span id="cb15-3"><a href="#cb15-3" tabindex="-1"></a> <span class="fu">hist</span>(s, <span class="at">xlab =</span> <span class="st">"value"</span>, <span class="at">main =</span> <span class="st">""</span>)</span>
<span id="cb15-4"><a href="#cb15-4" tabindex="-1"></a>}</span>
<span id="cb15-5"><a href="#cb15-5" tabindex="-1"></a></span>
<span id="cb15-6"><a href="#cb15-6" tabindex="-1"></a><span class="co"># lapply</span></span>
<span id="cb15-7"><a href="#cb15-7" tabindex="-1"></a><span class="fu">invisible</span>(<span class="fu">lapply</span>(samples, <span class="cf">function</span>(s) {</span>
<span id="cb15-8"><a href="#cb15-8" tabindex="-1"></a> <span class="fu">hist</span>(s, <span class="at">xlab =</span> <span class="st">"value"</span>, <span class="at">main =</span> <span class="st">""</span>)</span>
<span id="cb15-9"><a href="#cb15-9" tabindex="-1"></a>}))</span></code></pre></div></li>
<li><p>In purrr, we can use <code>walk()</code>.</p>
<div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" tabindex="-1"></a><span class="fu">walk</span>(samples, <span class="sc">~</span> <span class="fu">hist</span>(.x, <span class="at">xlab =</span> <span class="st">"value"</span>, <span class="at">main =</span> <span class="st">""</span>))</span></code></pre></div></li>
</ul>
</div>
<div id="pipes" class="section level3">
<h3>Pipes</h3>
<p>You can join multiple steps together either using the magrittr
pipe:</p>
<div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">2020</span>)</span>
<span id="cb17-2"><a href="#cb17-2" tabindex="-1"></a>means <span class="sc">%>%</span></span>
<span id="cb17-3"><a href="#cb17-3" tabindex="-1"></a> <span class="fu">map</span>(rnorm, <span class="at">n =</span> <span class="dv">5</span>, <span class="at">sd =</span> <span class="dv">1</span>) <span class="sc">%>%</span></span>
<span id="cb17-4"><a href="#cb17-4" tabindex="-1"></a> <span class="fu">map_dbl</span>(median)</span>
<span id="cb17-5"><a href="#cb17-5" tabindex="-1"></a><span class="co">#> [1] -0.09802317 2.72057350 2.87673977 4.05830349</span></span></code></pre></div>
<p>Or the base pipe R:</p>
<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">2020</span>)</span>
<span id="cb18-2"><a href="#cb18-2" tabindex="-1"></a>means <span class="sc">|></span> </span>
<span id="cb18-3"><a href="#cb18-3" tabindex="-1"></a> <span class="fu">lapply</span>(rnorm, <span class="at">n =</span> <span class="dv">5</span>, <span class="at">sd =</span> <span class="dv">1</span>) <span class="sc">|></span> </span>
<span id="cb18-4"><a href="#cb18-4" tabindex="-1"></a> <span class="fu">sapply</span>(median)</span>
<span id="cb18-5"><a href="#cb18-5" tabindex="-1"></a><span class="co">#> [1] -0.09802317 2.72057350 2.87673977 4.05830349</span></span></code></pre></div>
<p>(And of course you can mix and match the piping style with either
base R or purrr.)</p>
<p>The pipe is particularly compelling when working with longer
transformations. For example, the following code splits
<code>mtcars</code> up by <code>cyl</code>, fits a linear model,
extracts the coefficients, and extracts the first one (the
intercept).</p>
<div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1" tabindex="-1"></a>mtcars <span class="sc">%>%</span> </span>
<span id="cb19-2"><a href="#cb19-2" tabindex="-1"></a> <span class="fu">split</span>(mtcars<span class="sc">$</span>cyl) <span class="sc">%>%</span> </span>
<span id="cb19-3"><a href="#cb19-3" tabindex="-1"></a> <span class="fu">map</span>(\(df) <span class="fu">lm</span>(mpg <span class="sc">~</span> wt, <span class="at">data =</span> df)) <span class="sc">%>%</span> </span>
<span id="cb19-4"><a href="#cb19-4" tabindex="-1"></a> <span class="fu">map</span>(coef) <span class="sc">%>%</span> </span>
<span id="cb19-5"><a href="#cb19-5" tabindex="-1"></a> <span class="fu">map_dbl</span>(<span class="dv">1</span>)</span>
<span id="cb19-6"><a href="#cb19-6" tabindex="-1"></a><span class="co">#> 4 6 8 </span></span>
<span id="cb19-7"><a href="#cb19-7" tabindex="-1"></a><span class="co">#> 39.57120 28.40884 23.86803</span></span></code></pre></div>
</div>
</div>
</div>
<!-- code folding -->
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>