skimr
Version:
CLI EDA for CSVs
720 lines (691 loc) • 59.3 kB
HTML
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Grouped data</title>
<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
var i, h, a;
for (i = 0; i < hs.length; i++) {
h = hs[i];
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
a = h.attributes;
while (a.length > 0) h.removeAttribute(a[0].name);
}
});
</script>
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">
code {
white-space: pre;
}
.sourceCode {
overflow: visible;
}
</style>
<style type="text/css" data-origin="pandoc">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; }
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.at { color: #7d9029; }
code span.bn { color: #40a070; }
code span.bu { color: #008000; }
code span.cf { color: #007020; font-weight: bold; }
code span.ch { color: #4070a0; }
code span.cn { color: #880000; }
code span.co { color: #60a0b0; font-style: italic; }
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.do { color: #ba2121; font-style: italic; }
code span.dt { color: #902000; }
code span.dv { color: #40a070; }
code span.er { color: #ff0000; font-weight: bold; }
code span.ex { }
code span.fl { color: #40a070; }
code span.fu { color: #06287e; }
code span.im { color: #008000; font-weight: bold; }
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.kw { color: #007020; font-weight: bold; }
code span.op { color: #666666; }
code span.ot { color: #007020; }
code span.pp { color: #bc7a00; }
code span.sc { color: #4070a0; }
code span.ss { color: #bb6688; }
code span.st { color: #4070a0; }
code span.va { color: #19177c; }
code span.vs { color: #4070a0; }
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; }
</style>
<script>
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
var sheets = document.styleSheets;
for (var i = 0; i < sheets.length; i++) {
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
var j = 0;
while (j < rules.length) {
var rule = rules[j];
// check if there is a div.sourceCode rule
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
j++;
continue;
}
var style = rule.style.cssText;
// check if color or background-color is set
if (rule.style.color === '' && rule.style.backgroundColor === '') {
j++;
continue;
}
// replace div.sourceCode by a pre.sourceCode rule
sheets[i].deleteRule(j);
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
}
}
})();
</script>
<style type="text/css">body {
background-color: #fff;
margin: 1em auto;
max-width: 700px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.35;
}
#TOC {
clear: both;
margin: 0 0 10px 10px;
padding: 4px;
width: 400px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table {
margin: 1em auto;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table, table th, table td {
border-left-style: none;
border-right-style: none;
}
table thead, table tr.even {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 0.25em 0.75em;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f7f7f7;
border-radius: 3px;
color: #333;
white-space: pre-wrap;
}
pre {
border-radius: 3px;
margin: 5px 0px 10px 0px;
padding: 10px;
}
pre:not([class]) {
background-color: #f7f7f7;
}
code {
font-family: Consolas, Monaco, 'Courier New', monospace;
font-size: 85%;
}
p > code, li > code {
padding: 2px 0px;
}
div.figure {
text-align: center;
}
img {
background-color: #FFFFFF;
padding: 2px;
border: 1px solid #DDDDDD;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f7f7f7;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f7f7f7;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f7f7f7;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }
code > span.kw { color: #555; font-weight: bold; }
code > span.dt { color: #902000; }
code > span.dv { color: #40a070; }
code > span.bn { color: #d14; }
code > span.fl { color: #d14; }
code > span.ch { color: #d14; }
code > span.st { color: #d14; }
code > span.co { color: #888888; font-style: italic; }
code > span.ot { color: #007020; }
code > span.al { color: #ff0000; font-weight: bold; }
code > span.fu { color: #900; font-weight: bold; }
code > span.er { color: #a61717; background-color: #e3d2d2; }
</style>
</head>
<body>
<h1 class="title toc-ignore">Grouped data</h1>
<p>dplyr verbs are particularly powerful when you apply them to grouped
data frames (<code>grouped_df</code> objects). This vignette shows
you:</p>
<ul>
<li><p>How to group, inspect, and ungroup with <code>group_by()</code>
and friends.</p></li>
<li><p>How individual dplyr verbs changes their behaviour when applied
to grouped data frame.</p></li>
<li><p>How to access data about the “current” group from within a
verb.</p></li>
</ul>
<p>We’ll start by loading dplyr:</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" tabindex="-1"></a><span class="fu">library</span>(dplyr)</span></code></pre></div>
<div id="group_by" class="section level2">
<h2><code>group_by()</code></h2>
<p>The most important grouping verb is <code>group_by()</code>: it takes
a data frame and one or more variables to group by:</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" tabindex="-1"></a>by_species <span class="ot"><-</span> starwars <span class="sc">%>%</span> <span class="fu">group_by</span>(species)</span>
<span id="cb2-2"><a href="#cb2-2" tabindex="-1"></a>by_sex_gender <span class="ot"><-</span> starwars <span class="sc">%>%</span> <span class="fu">group_by</span>(sex, gender)</span></code></pre></div>
<p>You can see the grouping when you print the data:</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" tabindex="-1"></a>by_species</span>
<span id="cb3-2"><a href="#cb3-2" tabindex="-1"></a><span class="co">#> # A tibble: 87 × 14</span></span>
<span id="cb3-3"><a href="#cb3-3" tabindex="-1"></a><span class="co">#> # Groups: species [38]</span></span>
<span id="cb3-4"><a href="#cb3-4" tabindex="-1"></a><span class="co">#> name height mass hair_color skin_color eye_color birth_year sex gender</span></span>
<span id="cb3-5"><a href="#cb3-5" tabindex="-1"></a><span class="co">#> <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> </span></span>
<span id="cb3-6"><a href="#cb3-6" tabindex="-1"></a><span class="co">#> 1 Luke Sky… 172 77 blond fair blue 19 male mascu…</span></span>
<span id="cb3-7"><a href="#cb3-7" tabindex="-1"></a><span class="co">#> 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…</span></span>
<span id="cb3-8"><a href="#cb3-8" tabindex="-1"></a><span class="co">#> 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…</span></span>
<span id="cb3-9"><a href="#cb3-9" tabindex="-1"></a><span class="co">#> 4 Darth Va… 202 136 none white yellow 41.9 male mascu…</span></span>
<span id="cb3-10"><a href="#cb3-10" tabindex="-1"></a><span class="co">#> # ℹ 83 more rows</span></span>
<span id="cb3-11"><a href="#cb3-11" tabindex="-1"></a><span class="co">#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,</span></span>
<span id="cb3-12"><a href="#cb3-12" tabindex="-1"></a><span class="co">#> # vehicles <list>, starships <list></span></span>
<span id="cb3-13"><a href="#cb3-13" tabindex="-1"></a>by_sex_gender</span>
<span id="cb3-14"><a href="#cb3-14" tabindex="-1"></a><span class="co">#> # A tibble: 87 × 14</span></span>
<span id="cb3-15"><a href="#cb3-15" tabindex="-1"></a><span class="co">#> # Groups: sex, gender [6]</span></span>
<span id="cb3-16"><a href="#cb3-16" tabindex="-1"></a><span class="co">#> name height mass hair_color skin_color eye_color birth_year sex gender</span></span>
<span id="cb3-17"><a href="#cb3-17" tabindex="-1"></a><span class="co">#> <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> </span></span>
<span id="cb3-18"><a href="#cb3-18" tabindex="-1"></a><span class="co">#> 1 Luke Sky… 172 77 blond fair blue 19 male mascu…</span></span>
<span id="cb3-19"><a href="#cb3-19" tabindex="-1"></a><span class="co">#> 2 C-3PO 167 75 <NA> gold yellow 112 none mascu…</span></span>
<span id="cb3-20"><a href="#cb3-20" tabindex="-1"></a><span class="co">#> 3 R2-D2 96 32 <NA> white, bl… red 33 none mascu…</span></span>
<span id="cb3-21"><a href="#cb3-21" tabindex="-1"></a><span class="co">#> 4 Darth Va… 202 136 none white yellow 41.9 male mascu…</span></span>
<span id="cb3-22"><a href="#cb3-22" tabindex="-1"></a><span class="co">#> # ℹ 83 more rows</span></span>
<span id="cb3-23"><a href="#cb3-23" tabindex="-1"></a><span class="co">#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,</span></span>
<span id="cb3-24"><a href="#cb3-24" tabindex="-1"></a><span class="co">#> # vehicles <list>, starships <list></span></span></code></pre></div>
<p>Or use <code>tally()</code> to count the number of rows in each
group. The <code>sort</code> argument is useful if you want to see the
largest groups up front.</p>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" tabindex="-1"></a>by_species <span class="sc">%>%</span> <span class="fu">tally</span>()</span>
<span id="cb4-2"><a href="#cb4-2" tabindex="-1"></a><span class="co">#> # A tibble: 38 × 2</span></span>
<span id="cb4-3"><a href="#cb4-3" tabindex="-1"></a><span class="co">#> species n</span></span>
<span id="cb4-4"><a href="#cb4-4" tabindex="-1"></a><span class="co">#> <chr> <int></span></span>
<span id="cb4-5"><a href="#cb4-5" tabindex="-1"></a><span class="co">#> 1 Aleena 1</span></span>
<span id="cb4-6"><a href="#cb4-6" tabindex="-1"></a><span class="co">#> 2 Besalisk 1</span></span>
<span id="cb4-7"><a href="#cb4-7" tabindex="-1"></a><span class="co">#> 3 Cerean 1</span></span>
<span id="cb4-8"><a href="#cb4-8" tabindex="-1"></a><span class="co">#> 4 Chagrian 1</span></span>
<span id="cb4-9"><a href="#cb4-9" tabindex="-1"></a><span class="co">#> # ℹ 34 more rows</span></span>
<span id="cb4-10"><a href="#cb4-10" tabindex="-1"></a></span>
<span id="cb4-11"><a href="#cb4-11" tabindex="-1"></a>by_sex_gender <span class="sc">%>%</span> <span class="fu">tally</span>(<span class="at">sort =</span> <span class="cn">TRUE</span>)</span>
<span id="cb4-12"><a href="#cb4-12" tabindex="-1"></a><span class="co">#> # A tibble: 6 × 3</span></span>
<span id="cb4-13"><a href="#cb4-13" tabindex="-1"></a><span class="co">#> # Groups: sex [5]</span></span>
<span id="cb4-14"><a href="#cb4-14" tabindex="-1"></a><span class="co">#> sex gender n</span></span>
<span id="cb4-15"><a href="#cb4-15" tabindex="-1"></a><span class="co">#> <chr> <chr> <int></span></span>
<span id="cb4-16"><a href="#cb4-16" tabindex="-1"></a><span class="co">#> 1 male masculine 60</span></span>
<span id="cb4-17"><a href="#cb4-17" tabindex="-1"></a><span class="co">#> 2 female feminine 16</span></span>
<span id="cb4-18"><a href="#cb4-18" tabindex="-1"></a><span class="co">#> 3 none masculine 5</span></span>
<span id="cb4-19"><a href="#cb4-19" tabindex="-1"></a><span class="co">#> 4 <NA> <NA> 4</span></span>
<span id="cb4-20"><a href="#cb4-20" tabindex="-1"></a><span class="co">#> # ℹ 2 more rows</span></span></code></pre></div>
<p>As well as grouping by existing variables, you can group by any
function of existing variables. This is equivalent to performing a
<code>mutate()</code> <strong>before</strong> the
<code>group_by()</code>:</p>
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" tabindex="-1"></a>bmi_breaks <span class="ot"><-</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">18.5</span>, <span class="dv">25</span>, <span class="dv">30</span>, <span class="cn">Inf</span>)</span>
<span id="cb5-2"><a href="#cb5-2" tabindex="-1"></a></span>
<span id="cb5-3"><a href="#cb5-3" tabindex="-1"></a>starwars <span class="sc">%>%</span></span>
<span id="cb5-4"><a href="#cb5-4" tabindex="-1"></a> <span class="fu">group_by</span>(<span class="at">bmi_cat =</span> <span class="fu">cut</span>(mass<span class="sc">/</span>(height<span class="sc">/</span><span class="dv">100</span>)<span class="sc">^</span><span class="dv">2</span>, <span class="at">breaks=</span>bmi_breaks)) <span class="sc">%>%</span></span>
<span id="cb5-5"><a href="#cb5-5" tabindex="-1"></a> <span class="fu">tally</span>()</span>
<span id="cb5-6"><a href="#cb5-6" tabindex="-1"></a><span class="co">#> # A tibble: 5 × 2</span></span>
<span id="cb5-7"><a href="#cb5-7" tabindex="-1"></a><span class="co">#> bmi_cat n</span></span>
<span id="cb5-8"><a href="#cb5-8" tabindex="-1"></a><span class="co">#> <fct> <int></span></span>
<span id="cb5-9"><a href="#cb5-9" tabindex="-1"></a><span class="co">#> 1 (0,18.5] 10</span></span>
<span id="cb5-10"><a href="#cb5-10" tabindex="-1"></a><span class="co">#> 2 (18.5,25] 24</span></span>
<span id="cb5-11"><a href="#cb5-11" tabindex="-1"></a><span class="co">#> 3 (25,30] 13</span></span>
<span id="cb5-12"><a href="#cb5-12" tabindex="-1"></a><span class="co">#> 4 (30,Inf] 12</span></span>
<span id="cb5-13"><a href="#cb5-13" tabindex="-1"></a><span class="co">#> # ℹ 1 more row</span></span></code></pre></div>
</div>
<div id="group-metadata" class="section level2">
<h2>Group metadata</h2>
<p>You can see underlying group data with <code>group_keys()</code>. It
has one row for each group and one column for each grouping
variable:</p>
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" tabindex="-1"></a>by_species <span class="sc">%>%</span> <span class="fu">group_keys</span>()</span>
<span id="cb6-2"><a href="#cb6-2" tabindex="-1"></a><span class="co">#> # A tibble: 38 × 1</span></span>
<span id="cb6-3"><a href="#cb6-3" tabindex="-1"></a><span class="co">#> species </span></span>
<span id="cb6-4"><a href="#cb6-4" tabindex="-1"></a><span class="co">#> <chr> </span></span>
<span id="cb6-5"><a href="#cb6-5" tabindex="-1"></a><span class="co">#> 1 Aleena </span></span>
<span id="cb6-6"><a href="#cb6-6" tabindex="-1"></a><span class="co">#> 2 Besalisk</span></span>
<span id="cb6-7"><a href="#cb6-7" tabindex="-1"></a><span class="co">#> 3 Cerean </span></span>
<span id="cb6-8"><a href="#cb6-8" tabindex="-1"></a><span class="co">#> 4 Chagrian</span></span>
<span id="cb6-9"><a href="#cb6-9" tabindex="-1"></a><span class="co">#> # ℹ 34 more rows</span></span>
<span id="cb6-10"><a href="#cb6-10" tabindex="-1"></a></span>
<span id="cb6-11"><a href="#cb6-11" tabindex="-1"></a>by_sex_gender <span class="sc">%>%</span> <span class="fu">group_keys</span>()</span>
<span id="cb6-12"><a href="#cb6-12" tabindex="-1"></a><span class="co">#> # A tibble: 6 × 2</span></span>
<span id="cb6-13"><a href="#cb6-13" tabindex="-1"></a><span class="co">#> sex gender </span></span>
<span id="cb6-14"><a href="#cb6-14" tabindex="-1"></a><span class="co">#> <chr> <chr> </span></span>
<span id="cb6-15"><a href="#cb6-15" tabindex="-1"></a><span class="co">#> 1 female feminine </span></span>
<span id="cb6-16"><a href="#cb6-16" tabindex="-1"></a><span class="co">#> 2 hermaphroditic masculine</span></span>
<span id="cb6-17"><a href="#cb6-17" tabindex="-1"></a><span class="co">#> 3 male masculine</span></span>
<span id="cb6-18"><a href="#cb6-18" tabindex="-1"></a><span class="co">#> 4 none feminine </span></span>
<span id="cb6-19"><a href="#cb6-19" tabindex="-1"></a><span class="co">#> # ℹ 2 more rows</span></span></code></pre></div>
<p>You can see which group each row belongs to with
<code>group_indices()</code>:</p>
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" tabindex="-1"></a>by_species <span class="sc">%>%</span> <span class="fu">group_indices</span>()</span>
<span id="cb7-2"><a href="#cb7-2" tabindex="-1"></a><span class="co">#> [1] 11 6 6 11 11 11 11 6 11 11 11 11 34 11 24 12 11 11 36 11 11 6 31 11 11</span></span>
<span id="cb7-3"><a href="#cb7-3" tabindex="-1"></a><span class="co">#> [26] 18 11 11 8 26 11 21 11 10 10 10 38 30 7 38 11 37 32 32 33 35 29 11 3 20</span></span>
<span id="cb7-4"><a href="#cb7-4" tabindex="-1"></a><span class="co">#> [51] 37 27 13 23 16 4 11 11 11 9 17 17 11 11 11 11 5 2 15 15 11 1 6 25 19</span></span>
<span id="cb7-5"><a href="#cb7-5" tabindex="-1"></a><span class="co">#> [76] 28 14 34 11 38 22 11 11 11 6 38 11</span></span></code></pre></div>
<p>And which rows each group contains with
<code>group_rows()</code>:</p>
<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" tabindex="-1"></a>by_species <span class="sc">%>%</span> <span class="fu">group_rows</span>() <span class="sc">%>%</span> <span class="fu">head</span>()</span>
<span id="cb8-2"><a href="#cb8-2" tabindex="-1"></a><span class="co">#> <list_of<integer>[6]></span></span>
<span id="cb8-3"><a href="#cb8-3" tabindex="-1"></a><span class="co">#> [[1]]</span></span>
<span id="cb8-4"><a href="#cb8-4" tabindex="-1"></a><span class="co">#> [1] 72</span></span>
<span id="cb8-5"><a href="#cb8-5" tabindex="-1"></a><span class="co">#> </span></span>
<span id="cb8-6"><a href="#cb8-6" tabindex="-1"></a><span class="co">#> [[2]]</span></span>
<span id="cb8-7"><a href="#cb8-7" tabindex="-1"></a><span class="co">#> [1] 68</span></span>
<span id="cb8-8"><a href="#cb8-8" tabindex="-1"></a><span class="co">#> </span></span>
<span id="cb8-9"><a href="#cb8-9" tabindex="-1"></a><span class="co">#> [[3]]</span></span>
<span id="cb8-10"><a href="#cb8-10" tabindex="-1"></a><span class="co">#> [1] 49</span></span>
<span id="cb8-11"><a href="#cb8-11" tabindex="-1"></a><span class="co">#> </span></span>
<span id="cb8-12"><a href="#cb8-12" tabindex="-1"></a><span class="co">#> [[4]]</span></span>
<span id="cb8-13"><a href="#cb8-13" tabindex="-1"></a><span class="co">#> [1] 56</span></span>
<span id="cb8-14"><a href="#cb8-14" tabindex="-1"></a><span class="co">#> </span></span>
<span id="cb8-15"><a href="#cb8-15" tabindex="-1"></a><span class="co">#> [[5]]</span></span>
<span id="cb8-16"><a href="#cb8-16" tabindex="-1"></a><span class="co">#> [1] 67</span></span>
<span id="cb8-17"><a href="#cb8-17" tabindex="-1"></a><span class="co">#> </span></span>
<span id="cb8-18"><a href="#cb8-18" tabindex="-1"></a><span class="co">#> [[6]]</span></span>
<span id="cb8-19"><a href="#cb8-19" tabindex="-1"></a><span class="co">#> [1] 2 3 8 22 73 85</span></span></code></pre></div>
<p>Use <code>group_vars()</code> if you just want the names of the
grouping variables:</p>
<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" tabindex="-1"></a>by_species <span class="sc">%>%</span> <span class="fu">group_vars</span>()</span>
<span id="cb9-2"><a href="#cb9-2" tabindex="-1"></a><span class="co">#> [1] "species"</span></span>
<span id="cb9-3"><a href="#cb9-3" tabindex="-1"></a>by_sex_gender <span class="sc">%>%</span> <span class="fu">group_vars</span>()</span>
<span id="cb9-4"><a href="#cb9-4" tabindex="-1"></a><span class="co">#> [1] "sex" "gender"</span></span></code></pre></div>
<div id="changing-and-adding-to-grouping-variables" class="section level3">
<h3>Changing and adding to grouping variables</h3>
<p>If you apply <code>group_by()</code> to an already grouped dataset,
will overwrite the existing grouping variables. For example, the
following code groups by <code>homeworld</code> instead of
<code>species</code>:</p>
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" tabindex="-1"></a>by_species <span class="sc">%>%</span></span>
<span id="cb10-2"><a href="#cb10-2" tabindex="-1"></a> <span class="fu">group_by</span>(homeworld) <span class="sc">%>%</span></span>
<span id="cb10-3"><a href="#cb10-3" tabindex="-1"></a> <span class="fu">tally</span>()</span>
<span id="cb10-4"><a href="#cb10-4" tabindex="-1"></a><span class="co">#> # A tibble: 49 × 2</span></span>
<span id="cb10-5"><a href="#cb10-5" tabindex="-1"></a><span class="co">#> homeworld n</span></span>
<span id="cb10-6"><a href="#cb10-6" tabindex="-1"></a><span class="co">#> <chr> <int></span></span>
<span id="cb10-7"><a href="#cb10-7" tabindex="-1"></a><span class="co">#> 1 Alderaan 3</span></span>
<span id="cb10-8"><a href="#cb10-8" tabindex="-1"></a><span class="co">#> 2 Aleen Minor 1</span></span>
<span id="cb10-9"><a href="#cb10-9" tabindex="-1"></a><span class="co">#> 3 Bespin 1</span></span>
<span id="cb10-10"><a href="#cb10-10" tabindex="-1"></a><span class="co">#> 4 Bestine IV 1</span></span>
<span id="cb10-11"><a href="#cb10-11" tabindex="-1"></a><span class="co">#> # ℹ 45 more rows</span></span></code></pre></div>
<p>To <strong>augment</strong> the grouping, using
<code>.add = TRUE</code><a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a>. For example, the following code groups by
species and homeworld:</p>
<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" tabindex="-1"></a>by_species <span class="sc">%>%</span></span>
<span id="cb11-2"><a href="#cb11-2" tabindex="-1"></a> <span class="fu">group_by</span>(homeworld, <span class="at">.add =</span> <span class="cn">TRUE</span>) <span class="sc">%>%</span></span>
<span id="cb11-3"><a href="#cb11-3" tabindex="-1"></a> <span class="fu">tally</span>()</span>
<span id="cb11-4"><a href="#cb11-4" tabindex="-1"></a><span class="co">#> # A tibble: 58 × 3</span></span>
<span id="cb11-5"><a href="#cb11-5" tabindex="-1"></a><span class="co">#> # Groups: species [38]</span></span>
<span id="cb11-6"><a href="#cb11-6" tabindex="-1"></a><span class="co">#> species homeworld n</span></span>
<span id="cb11-7"><a href="#cb11-7" tabindex="-1"></a><span class="co">#> <chr> <chr> <int></span></span>
<span id="cb11-8"><a href="#cb11-8" tabindex="-1"></a><span class="co">#> 1 Aleena Aleen Minor 1</span></span>
<span id="cb11-9"><a href="#cb11-9" tabindex="-1"></a><span class="co">#> 2 Besalisk Ojom 1</span></span>
<span id="cb11-10"><a href="#cb11-10" tabindex="-1"></a><span class="co">#> 3 Cerean Cerea 1</span></span>
<span id="cb11-11"><a href="#cb11-11" tabindex="-1"></a><span class="co">#> 4 Chagrian Champala 1</span></span>
<span id="cb11-12"><a href="#cb11-12" tabindex="-1"></a><span class="co">#> # ℹ 54 more rows</span></span></code></pre></div>
</div>
<div id="removing-grouping-variables" class="section level3">
<h3>Removing grouping variables</h3>
<p>To remove all grouping variables, use <code>ungroup()</code>:</p>
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" tabindex="-1"></a>by_species <span class="sc">%>%</span></span>
<span id="cb12-2"><a href="#cb12-2" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">%>%</span></span>
<span id="cb12-3"><a href="#cb12-3" tabindex="-1"></a> <span class="fu">tally</span>()</span>
<span id="cb12-4"><a href="#cb12-4" tabindex="-1"></a><span class="co">#> # A tibble: 1 × 1</span></span>
<span id="cb12-5"><a href="#cb12-5" tabindex="-1"></a><span class="co">#> n</span></span>
<span id="cb12-6"><a href="#cb12-6" tabindex="-1"></a><span class="co">#> <int></span></span>
<span id="cb12-7"><a href="#cb12-7" tabindex="-1"></a><span class="co">#> 1 87</span></span></code></pre></div>
<p>You can also choose to selectively ungroup by listing the variables
you want to remove:</p>
<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" tabindex="-1"></a>by_sex_gender <span class="sc">%>%</span> </span>
<span id="cb13-2"><a href="#cb13-2" tabindex="-1"></a> <span class="fu">ungroup</span>(sex) <span class="sc">%>%</span> </span>
<span id="cb13-3"><a href="#cb13-3" tabindex="-1"></a> <span class="fu">tally</span>()</span>
<span id="cb13-4"><a href="#cb13-4" tabindex="-1"></a><span class="co">#> # A tibble: 3 × 2</span></span>
<span id="cb13-5"><a href="#cb13-5" tabindex="-1"></a><span class="co">#> gender n</span></span>
<span id="cb13-6"><a href="#cb13-6" tabindex="-1"></a><span class="co">#> <chr> <int></span></span>
<span id="cb13-7"><a href="#cb13-7" tabindex="-1"></a><span class="co">#> 1 feminine 17</span></span>
<span id="cb13-8"><a href="#cb13-8" tabindex="-1"></a><span class="co">#> 2 masculine 66</span></span>
<span id="cb13-9"><a href="#cb13-9" tabindex="-1"></a><span class="co">#> 3 <NA> 4</span></span></code></pre></div>
</div>
</div>
<div id="verbs" class="section level2">
<h2>Verbs</h2>
<p>The following sections describe how grouping affects the main dplyr
verbs.</p>
<div id="summarise" class="section level3">
<h3><code>summarise()</code></h3>
<p><code>summarise()</code> computes a summary for each group. This
means that it starts from <code>group_keys()</code>, adding summary
variables to the right hand side:</p>
<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" tabindex="-1"></a>by_species <span class="sc">%>%</span></span>
<span id="cb14-2"><a href="#cb14-2" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb14-3"><a href="#cb14-3" tabindex="-1"></a> <span class="at">n =</span> <span class="fu">n</span>(),</span>
<span id="cb14-4"><a href="#cb14-4" tabindex="-1"></a> <span class="at">height =</span> <span class="fu">mean</span>(height, <span class="at">na.rm =</span> <span class="cn">TRUE</span>)</span>
<span id="cb14-5"><a href="#cb14-5" tabindex="-1"></a> )</span>
<span id="cb14-6"><a href="#cb14-6" tabindex="-1"></a><span class="co">#> # A tibble: 38 × 3</span></span>
<span id="cb14-7"><a href="#cb14-7" tabindex="-1"></a><span class="co">#> species n height</span></span>
<span id="cb14-8"><a href="#cb14-8" tabindex="-1"></a><span class="co">#> <chr> <int> <dbl></span></span>
<span id="cb14-9"><a href="#cb14-9" tabindex="-1"></a><span class="co">#> 1 Aleena 1 79</span></span>
<span id="cb14-10"><a href="#cb14-10" tabindex="-1"></a><span class="co">#> 2 Besalisk 1 198</span></span>
<span id="cb14-11"><a href="#cb14-11" tabindex="-1"></a><span class="co">#> 3 Cerean 1 198</span></span>
<span id="cb14-12"><a href="#cb14-12" tabindex="-1"></a><span class="co">#> 4 Chagrian 1 196</span></span>
<span id="cb14-13"><a href="#cb14-13" tabindex="-1"></a><span class="co">#> # ℹ 34 more rows</span></span></code></pre></div>
<p>The <code>.groups=</code> argument controls the grouping structure of
the output. The historical behaviour of removing the right hand side
grouping variable corresponds to <code>.groups = "drop_last"</code>
without a message or <code>.groups = NULL</code> with a message (the
default).</p>
<div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" tabindex="-1"></a>by_sex_gender <span class="sc">%>%</span> </span>
<span id="cb15-2"><a href="#cb15-2" tabindex="-1"></a> <span class="fu">summarise</span>(<span class="at">n =</span> <span class="fu">n</span>()) <span class="sc">%>%</span> </span>
<span id="cb15-3"><a href="#cb15-3" tabindex="-1"></a> <span class="fu">group_vars</span>()</span>
<span id="cb15-4"><a href="#cb15-4" tabindex="-1"></a><span class="co">#> `summarise()` has grouped output by 'sex'. You can override using the `.groups`</span></span>
<span id="cb15-5"><a href="#cb15-5" tabindex="-1"></a><span class="co">#> argument.</span></span>
<span id="cb15-6"><a href="#cb15-6" tabindex="-1"></a><span class="co">#> [1] "sex"</span></span>
<span id="cb15-7"><a href="#cb15-7" tabindex="-1"></a></span>
<span id="cb15-8"><a href="#cb15-8" tabindex="-1"></a>by_sex_gender <span class="sc">%>%</span> </span>
<span id="cb15-9"><a href="#cb15-9" tabindex="-1"></a> <span class="fu">summarise</span>(<span class="at">n =</span> <span class="fu">n</span>(), <span class="at">.groups =</span> <span class="st">"drop_last"</span>) <span class="sc">%>%</span> </span>
<span id="cb15-10"><a href="#cb15-10" tabindex="-1"></a> <span class="fu">group_vars</span>()</span>
<span id="cb15-11"><a href="#cb15-11" tabindex="-1"></a><span class="co">#> [1] "sex"</span></span></code></pre></div>
<p>Since version 1.0.0 the groups may also be kept
(<code>.groups = "keep"</code>) or dropped
(<code>.groups = "drop"</code>).</p>
<div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" tabindex="-1"></a>by_sex_gender <span class="sc">%>%</span> </span>
<span id="cb16-2"><a href="#cb16-2" tabindex="-1"></a> <span class="fu">summarise</span>(<span class="at">n =</span> <span class="fu">n</span>(), <span class="at">.groups =</span> <span class="st">"keep"</span>) <span class="sc">%>%</span> </span>
<span id="cb16-3"><a href="#cb16-3" tabindex="-1"></a> <span class="fu">group_vars</span>()</span>
<span id="cb16-4"><a href="#cb16-4" tabindex="-1"></a><span class="co">#> [1] "sex" "gender"</span></span>
<span id="cb16-5"><a href="#cb16-5" tabindex="-1"></a></span>
<span id="cb16-6"><a href="#cb16-6" tabindex="-1"></a>by_sex_gender <span class="sc">%>%</span> </span>
<span id="cb16-7"><a href="#cb16-7" tabindex="-1"></a> <span class="fu">summarise</span>(<span class="at">n =</span> <span class="fu">n</span>(), <span class="at">.groups =</span> <span class="st">"drop"</span>) <span class="sc">%>%</span> </span>
<span id="cb16-8"><a href="#cb16-8" tabindex="-1"></a> <span class="fu">group_vars</span>()</span>
<span id="cb16-9"><a href="#cb16-9" tabindex="-1"></a><span class="co">#> character(0)</span></span></code></pre></div>
<p>When the output no longer have grouping variables, it becomes
ungrouped (i.e. a regular tibble).</p>
</div>
<div id="select-rename-and-relocate" class="section level3">
<h3><code>select()</code>, <code>rename()</code>, and
<code>relocate()</code></h3>
<p><code>rename()</code> and <code>relocate()</code> behave identically
with grouped and ungrouped data because they only affect the name or
position of existing columns. Grouped <code>select()</code> is almost
identical to ungrouped select, except that it always includes the
grouping variables:</p>
<div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" tabindex="-1"></a>by_species <span class="sc">%>%</span> <span class="fu">select</span>(mass)</span>
<span id="cb17-2"><a href="#cb17-2" tabindex="-1"></a><span class="co">#> Adding missing grouping variables: `species`</span></span>
<span id="cb17-3"><a href="#cb17-3" tabindex="-1"></a><span class="co">#> # A tibble: 87 × 2</span></span>
<span id="cb17-4"><a href="#cb17-4" tabindex="-1"></a><span class="co">#> # Groups: species [38]</span></span>
<span id="cb17-5"><a href="#cb17-5" tabindex="-1"></a><span class="co">#> species mass</span></span>
<span id="cb17-6"><a href="#cb17-6" tabindex="-1"></a><span class="co">#> <chr> <dbl></span></span>
<span id="cb17-7"><a href="#cb17-7" tabindex="-1"></a><span class="co">#> 1 Human 77</span></span>
<span id="cb17-8"><a href="#cb17-8" tabindex="-1"></a><span class="co">#> 2 Droid 75</span></span>
<span id="cb17-9"><a href="#cb17-9" tabindex="-1"></a><span class="co">#> 3 Droid 32</span></span>
<span id="cb17-10"><a href="#cb17-10" tabindex="-1"></a><span class="co">#> 4 Human 136</span></span>
<span id="cb17-11"><a href="#cb17-11" tabindex="-1"></a><span class="co">#> # ℹ 83 more rows</span></span></code></pre></div>
<p>If you don’t want the grouping variables, you’ll have to first
<code>ungroup()</code>. (This design is possibly a mistake, but we’re
stuck with it for now.)</p>
</div>
<div id="arrange" class="section level3">
<h3><code>arrange()</code></h3>
<p>Grouped <code>arrange()</code> is the same as ungrouped
<code>arrange()</code>, unless you set <code>.by_group = TRUE</code>, in
which case it will order first by the grouping variables.</p>
<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" tabindex="-1"></a>by_species <span class="sc">%>%</span></span>
<span id="cb18-2"><a href="#cb18-2" tabindex="-1"></a> <span class="fu">arrange</span>(<span class="fu">desc</span>(mass)) <span class="sc">%>%</span></span>
<span id="cb18-3"><a href="#cb18-3" tabindex="-1"></a> <span class="fu">relocate</span>(species, mass)</span>
<span id="cb18-4"><a href="#cb18-4" tabindex="-1"></a><span class="co">#> # A tibble: 87 × 14</span></span>
<span id="cb18-5"><a href="#cb18-5" tabindex="-1"></a><span class="co">#> # Groups: species [38]</span></span>
<span id="cb18-6"><a href="#cb18-6" tabindex="-1"></a><span class="co">#> species mass name height hair_color skin_color eye_color birth_year sex </span></span>
<span id="cb18-7"><a href="#cb18-7" tabindex="-1"></a><span class="co">#> <chr> <dbl> <chr> <int> <chr> <chr> <chr> <dbl> <chr></span></span>
<span id="cb18-8"><a href="#cb18-8" tabindex="-1"></a><span class="co">#> 1 Hutt 1358 Jabba D… 175 <NA> green-tan… orange 600 herm…</span></span>
<span id="cb18-9"><a href="#cb18-9" tabindex="-1"></a><span class="co">#> 2 Kaleesh 159 Grievous 216 none brown, wh… green, y… NA male </span></span>
<span id="cb18-10"><a href="#cb18-10" tabindex="-1"></a><span class="co">#> 3 Droid 140 IG-88 200 none metal red 15 none </span></span>
<span id="cb18-11"><a href="#cb18-11" tabindex="-1"></a><span class="co">#> 4 Human 136 Darth V… 202 none white yellow 41.9 male </span></span>
<span id="cb18-12"><a href="#cb18-12" tabindex="-1"></a><span class="co">#> # ℹ 83 more rows</span></span>
<span id="cb18-13"><a href="#cb18-13" tabindex="-1"></a><span class="co">#> # ℹ 5 more variables: gender <chr>, homeworld <chr>, films <list>,</span></span>
<span id="cb18-14"><a href="#cb18-14" tabindex="-1"></a><span class="co">#> # vehicles <list>, starships <list></span></span>
<span id="cb18-15"><a href="#cb18-15" tabindex="-1"></a></span>
<span id="cb18-16"><a href="#cb18-16" tabindex="-1"></a>by_species <span class="sc">%>%</span></span>
<span id="cb18-17"><a href="#cb18-17" tabindex="-1"></a> <span class="fu">arrange</span>(<span class="fu">desc</span>(mass), <span class="at">.by_group =</span> <span class="cn">TRUE</span>) <span class="sc">%>%</span></span>
<span id="cb18-18"><a href="#cb18-18" tabindex="-1"></a> <span class="fu">relocate</span>(species, mass)</span>
<span id="cb18-19"><a href="#cb18-19" tabindex="-1"></a><span class="co">#> # A tibble: 87 × 14</span></span>
<span id="cb18-20"><a href="#cb18-20" tabindex="-1"></a><span class="co">#> # Groups: species [38]</span></span>
<span id="cb18-21"><a href="#cb18-21" tabindex="-1"></a><span class="co">#> species mass name height hair_color skin_color eye_color birth_year sex </span></span>
<span id="cb18-22"><a href="#cb18-22" tabindex="-1"></a><span class="co">#> <chr> <dbl> <chr> <int> <chr> <chr> <chr> <dbl> <chr></span></span>
<span id="cb18-23"><a href="#cb18-23" tabindex="-1"></a><span class="co">#> 1 Aleena 15 Ratts … 79 none grey, blue unknown NA male </span></span>
<span id="cb18-24"><a href="#cb18-24" tabindex="-1"></a><span class="co">#> 2 Besalisk 102 Dexter… 198 none brown yellow NA male </span></span>
<span id="cb18-25"><a href="#cb18-25" tabindex="-1"></a><span class="co">#> 3 Cerean 82 Ki-Adi… 198 white pale yellow 92 male </span></span>
<span id="cb18-26"><a href="#cb18-26" tabindex="-1"></a><span class="co">#> 4 Chagrian NA Mas Am… 196 none blue blue NA male </span></span>
<span id="cb18-27"><a href="#cb18-27" tabindex="-1"></a><span class="co">#> # ℹ 83 more rows</span></span>
<span id="cb18-28"><a href="#cb18-28" tabindex="-1"></a><span class="co">#> # ℹ 5 more variables: gender <chr>, homeworld <chr>, films <list>,</span></span>
<span id="cb18-29"><a href="#cb18-29" tabindex="-1"></a><span class="co">#> # vehicles <list>, starships <list></span></span></code></pre></div>
<p>Note that second example is sorted by <code>species</code> (from the
<code>group_by()</code> statement) and then by <code>mass</code> (within
species).</p>
</div>
<div id="mutate" class="section level3">
<h3><code>mutate()</code></h3>
<p>In simple cases with vectorised functions, grouped and ungrouped
<code>mutate()</code> give the same results. They differ when used with
summary functions:</p>
<div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1" tabindex="-1"></a><span class="co"># Subtract off global mean</span></span>
<span id="cb19-2"><a href="#cb19-2" tabindex="-1"></a>starwars <span class="sc">%>%</span> </span>
<span id="cb19-3"><a href="#cb19-3" tabindex="-1"></a> <span class="fu">select</span>(name, homeworld, mass) <span class="sc">%>%</span> </span>
<span id="cb19-4"><a href="#cb19-4" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">standard_mass =</span> mass <span class="sc">-</span> <span class="fu">mean</span>(mass, <span class="at">na.rm =</span> <span class="cn">TRUE</span>))</span>
<span id="cb19-5"><a href="#cb19-5" tabindex="-1"></a><span class="co">#> # A tibble: 87 × 4</span></span>
<span id="cb19-6"><a href="#cb19-6" tabindex="-1"></a><span class="co">#> name homeworld mass standard_mass</span></span>
<span id="cb19-7"><a href="#cb19-7" tabindex="-1"></a><span class="co">#> <chr> <chr> <dbl> <dbl></span></span>
<span id="cb19-8"><a href="#cb19-8" tabindex="-1"></a><span class="co">#> 1 Luke Skywalker Tatooine 77 -20.3</span></span>
<span id="cb19-9"><a href="#cb19-9" tabindex="-1"></a><span class="co">#> 2 C-3PO Tatooine 75 -22.3</span></span>
<span id="cb19-10"><a href="#cb19-10" tabindex="-1"></a><span class="co">#> 3 R2-D2 Naboo 32 -65.3</span></span>
<span id="cb19-11"><a href="#cb19-11" tabindex="-1"></a><span class="co">#> 4 Darth Vader Tatooine 136 38.7</span></span>
<span id="cb19-12"><a href="#cb19-12" tabindex="-1"></a><span class="co">#> # ℹ 83 more rows</span></span>
<span id="cb19-13"><a href="#cb19-13" tabindex="-1"></a></span>
<span id="cb19-14"><a href="#cb19-14" tabindex="-1"></a><span class="co"># Subtract off homeworld mean</span></span>
<span id="cb19-15"><a href="#cb19-15" tabindex="-1"></a>starwars <span class="sc">%>%</span> </span>
<span id="cb19-16"><a href="#cb19-16" tabindex="-1"></a> <span class="fu">select</span>(name, homeworld, mass) <span class="sc">%>%</span> </span>
<span id="cb19-17"><a href="#cb19-17" tabindex="-1"></a> <span class="fu">group_by</span>(homeworld) <span class="sc">%>%</span> </span>
<span id="cb19-18"><a href="#cb19-18" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">standard_mass =</span> mass <span class="sc">-</span> <span class="fu">mean</span>(mass, <span class="at">na.rm =</span> <span class="cn">TRUE</span>))</span>
<span id="cb19-19"><a href="#cb19-19" tabindex="-1"></a><span class="co">#> # A tibble: 87 × 4</span></span>
<span id="cb19-20"><a href="#cb19-20" tabindex="-1"></a><span class="co">#> # Groups: homeworld [49]</span></span>
<span id="cb19-21"><a href="#cb19-21" tabindex="-1"></a><span class="co">#> name homeworld mass standard_mass</span></span>
<span id="cb19-22"><a href="#cb19-22" tabindex="-1"></a><span class="co">#> <chr> <chr> <dbl> <dbl></span></span>
<span id="cb19-23"><a href="#cb19-23" tabindex="-1"></a><span class="co">#> 1 Luke Skywalker Tatooine 77 -8.38</span></span>
<span id="cb19-24"><a href="#cb19-24" tabindex="-1"></a><span class="co">#> 2 C-3PO Tatooine 75 -10.4 </span></span>
<span id="cb19-25"><a href="#cb19-25" tabindex="-1"></a><span class="co">#> 3 R2-D2 Naboo 32 -32.2 </span></span>
<span id="cb19-26"><a href="#cb19-26" tabindex="-1"></a><span class="co">#> 4 Darth Vader Tatooine 136 50.6 </span></span>
<span id="cb19-27"><a href="#cb19-27" tabindex="-1"></a><span class="co">#> # ℹ 83 more rows</span></span></code></pre></div>
<p>Or with window functions like <code>min_rank()</code>:</p>
<div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" tabindex="-1"></a><span class="co"># Overall rank</span></span>
<span id="cb20-2"><a href="#cb20-2" tabindex="-1"></a>starwars <span class="sc">%>%</span> </span>
<span id="cb20-3"><a href="#cb20-3" tabindex="-1"></a> <span class="fu">select</span>(name, homeworld, height) <span class="sc">%>%</span> </span>
<span id="cb20-4"><a href="#cb20-4" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">rank =</span> <span class="fu">min_rank</span>(height))</span>
<span id="cb20-5"><a href="#cb20-5" tabindex="-1"></a><span class="co">#> # A tibble: 87 × 4</span></span>
<span id="cb20-6"><a href="#cb20-6" tabindex="-1"></a><span class="co">#> name homeworld height rank</span></span>
<span id="cb20-7"><a href="#cb20-7" tabindex="-1"></a><span class="co">#> <chr> <chr> <int> <int></span></span>
<span id="cb20-8"><a href="#cb20-8" tabindex="-1"></a><span class="co">#> 1 Luke Skywalker Tatooine 172 29</span></span>
<span id="cb20-9"><a href="#cb20-9" tabindex="-1"></a><span class="co">#> 2 C-3PO Tatooine 167 21</span></span>
<span id="cb20-10"><a href="#cb20-10" tabindex="-1"></a><span class="co">#> 3 R2-D2 Naboo 96 5</span></span>
<span id="cb20-11"><a href="#cb20-11" tabindex="-1"></a><span class="co">#> 4 Darth Vader Tatooine 202 72</span></span>
<span id="cb20-12"><a href="#cb20-12" tabindex="-1"></a><span class="co">#> # ℹ 83 more rows</span></span>
<span id="cb20-13"><a href="#cb20-13" tabindex="-1"></a></span>
<span id="cb20-14"><a href="#cb20-14" tabindex="-1"></a><span class="co"># Rank per homeworld</span></span>
<span id="cb20-15"><a href="#cb20-15" tabindex="-1"></a>starwars <span class="sc">%>%</span> </span>
<span id="cb20-16"><a href="#cb20-16" tabindex="-1"></a> <span class="fu">select</span>(name, homeworld, height) <span class="sc">%>%</span> </span>
<span id="cb20-17"><a href="#cb20-17" tabindex="-1"></a> <span class="fu">group_by</span>(homeworld) <span class="sc">%>%</span> </span>
<span id="cb20-18"><a href="#cb20-18" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">rank =</span> <span class="fu">min_rank</span>(height))</span>
<span id="cb20-19"><a href="#cb20-19" tabindex="-1"></a><span class="co">#> # A tibble: 87 × 4</span></span>
<span id="cb20-20"><a href="#cb20-20" tabindex="-1"></a><span class="co">#> # Groups: homeworld [49]</span></span>
<span id="cb20-21"><a href="#cb20-21" tabindex="-1"></a><span class="co">#> name homeworld height rank</span></span>
<span id="cb20-22"><a href="#cb20-22" tabindex="-1"></a><span class="co">#> <chr> <chr> <int> <int></span></span>
<span id="cb20-23"><a href="#cb20-23" tabindex="-1"></a><span class="co">#> 1 Luke Skywalker Tatooine 172 5</span></span>
<span id="cb20-24"><a href="#cb20-24" tabindex="-1"></a><span class="co">#> 2 C-3PO Tatooine 167 4</span></span>
<span id="cb20-25"><a href="#cb20-25" tabindex="-1"></a><span class="co">#> 3 R2-D2 Naboo 96 1</span></span>
<span id="cb20-26"><a href="#cb20-26" tabindex="-1"></a><span class="co">#> 4 Darth Vader Tatooine 202 10</span></span>
<span id="cb20-27"><a href="#cb20-27" tabindex="-1"></a><span class="co">#> # ℹ 83 more rows</span></span></code></pre></div>
</div>
<div id="filter" class="section level3">
<h3><code>filter()</code></h3>
<p>A grouped <code>filter()</code> effectively does a
<code>mutate()</code> to generate a logical v