skimr
Version:
CLI EDA for CSVs
955 lines (926 loc) • 178 kB
HTML
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Motivations for cpp11</title>
<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
var i, h, a;
for (i = 0; i < hs.length; i++) {
h = hs[i];
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
a = h.attributes;
while (a.length > 0) h.removeAttribute(a[0].name);
}
});
</script>
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">
code {
white-space: pre;
}
.sourceCode {
overflow: visible;
}
</style>
<style type="text/css" data-origin="pandoc">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; }
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.at { color: #7d9029; }
code span.bn { color: #40a070; }
code span.bu { color: #008000; }
code span.cf { color: #007020; font-weight: bold; }
code span.ch { color: #4070a0; }
code span.cn { color: #880000; }
code span.co { color: #60a0b0; font-style: italic; }
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.do { color: #ba2121; font-style: italic; }
code span.dt { color: #902000; }
code span.dv { color: #40a070; }
code span.er { color: #ff0000; font-weight: bold; }
code span.ex { }
code span.fl { color: #40a070; }
code span.fu { color: #06287e; }
code span.im { color: #008000; font-weight: bold; }
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.kw { color: #007020; font-weight: bold; }
code span.op { color: #666666; }
code span.ot { color: #007020; }
code span.pp { color: #bc7a00; }
code span.sc { color: #4070a0; }
code span.ss { color: #bb6688; }
code span.st { color: #4070a0; }
code span.va { color: #19177c; }
code span.vs { color: #4070a0; }
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; }
</style>
<script>
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
var sheets = document.styleSheets;
for (var i = 0; i < sheets.length; i++) {
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
var j = 0;
while (j < rules.length) {
var rule = rules[j];
// check if there is a div.sourceCode rule
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
j++;
continue;
}
var style = rule.style.cssText;
// check if color or background-color is set
if (rule.style.color === '' && rule.style.backgroundColor === '') {
j++;
continue;
}
// replace div.sourceCode by a pre.sourceCode rule
sheets[i].deleteRule(j);
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
}
}
})();
</script>
<style type="text/css">body {
background-color: #fff;
margin: 1em auto;
max-width: 700px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.35;
}
#TOC {
clear: both;
margin: 0 0 10px 10px;
padding: 4px;
width: 400px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table {
margin: 1em auto;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table, table th, table td {
border-left-style: none;
border-right-style: none;
}
table thead, table tr.even {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 0.25em 0.75em;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f7f7f7;
border-radius: 3px;
color: #333;
white-space: pre-wrap;
}
pre {
border-radius: 3px;
margin: 5px 0px 10px 0px;
padding: 10px;
}
pre:not([class]) {
background-color: #f7f7f7;
}
code {
font-family: Consolas, Monaco, 'Courier New', monospace;
font-size: 85%;
}
p > code, li > code {
padding: 2px 0px;
}
div.figure {
text-align: center;
}
img {
background-color: #FFFFFF;
padding: 2px;
border: 1px solid #DDDDDD;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f7f7f7;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f7f7f7;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f7f7f7;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }
code > span.kw { color: #555; font-weight: bold; }
code > span.dt { color: #902000; }
code > span.dv { color: #40a070; }
code > span.bn { color: #d14; }
code > span.fl { color: #d14; }
code > span.ch { color: #d14; }
code > span.st { color: #d14; }
code > span.co { color: #888888; font-style: italic; }
code > span.ot { color: #007020; }
code > span.al { color: #ff0000; font-weight: bold; }
code > span.fu { color: #900; font-weight: bold; }
code > span.er { color: #a61717; background-color: #e3d2d2; }
</style>
</head>
<body>
<h1 class="title toc-ignore">Motivations for cpp11</h1>
<div id="motivations" class="section level1">
<h1>Motivations</h1>
<p>R and S have a long history of interacting with compiled languages.
In fact the original version of S written in the late 1970s was mainly a
wrapper around FORTRAN routines. <a href="https://www.r-project.org/conferences/useR-2006/Slides/Chambers.pdf">(History-of-S)</a>
Released in 2000, the <a href="https://cran.r-project.org/package=cxx">cxx</a> package was an
early prototype of C++ bindings to R. <a href="https://cran.r-project.org/package=Rcpp">Rcpp</a> was first
published to CRAN in 2008, and <a href="https://cran.r-project.org/package=Rcpp11">Rcpp11</a> in 2014. Of
these <code>Rcpp</code> has by far the widest adoption, with over 2000
reverse dependencies as of 2020.</p>
<p>Rcpp has been a widely successful project, however over the years a
number of issues and additional C++ features have arisen. Adding these
features to Rcpp would require a great deal of work, or in some cases
would be impossible without severely breaking backwards
compatibility.</p>
<p>cpp11 is a ground up rewrite of C++ bindings to R with different
design trade-offs and features.</p>
<p>Changes that motivated cpp11 include:</p>
<ul>
<li>Enforcing <a href="#copy-on-write-semantics">copy-on-write
semantics</a>.</li>
<li>Improving the <a href="#improve-safety">safety</a> of using the R
API from C++ code.</li>
<li>Supporting <a href="#altrep-support">ALTREP objects</a>.</li>
<li>Using <a href="#utf-8-everywhere">UTF-8 strings</a> everywhere.</li>
<li>Applying newer <a href="#c11-features">C++11 features</a>.</li>
<li>Having a more straightforward, <a href="#simpler-implementation">simpler implementation</a>.</li>
<li>Faster <a href="#compilation-speed">compilation time</a> with lower
memory requirements.</li>
<li>Being <em>completely</em> <a href="#header-only">header only</a> to
avoid ABI issues.</li>
<li>Capable of <a href="#vendoring">vendoring</a> if desired.</li>
<li>More robust <a href="#protection">protection</a> using a much more
efficient linked list data structure.</li>
<li><a href="#growing-vectors">Growing vectors</a> more
efficiently.</li>
</ul>
<div id="copy-on-write-semantics" class="section level2">
<h2>Copy-on-write semantics</h2>
<p>R uses <a href="https://adv-r.hadley.nz/names-values.html#copy-on-modify">copy-on-write</a>
(also called copy-on-modify) semantics. Lets say you have two variables
<code>x</code> and <code>y</code> that both point to the same underlying
data.</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" tabindex="-1"></a>x <span class="ot"><-</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>)</span>
<span id="cb1-2"><a href="#cb1-2" tabindex="-1"></a>y <span class="ot"><-</span> x</span></code></pre></div>
<p>If you modify <code>y</code>, R will first copy the values of
<code>x</code> to a new position, then point <code>y</code> to the new
location and only after the copy modify <code>y</code>. This allows
<code>x</code> to retain the original values.</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" tabindex="-1"></a>y[[<span class="dv">3</span>]] <span class="ot"><-</span> <span class="dv">4</span></span>
<span id="cb2-2"><a href="#cb2-2" tabindex="-1"></a>y</span>
<span id="cb2-3"><a href="#cb2-3" tabindex="-1"></a><span class="co">#> [1] 1 2 4</span></span>
<span id="cb2-4"><a href="#cb2-4" tabindex="-1"></a></span>
<span id="cb2-5"><a href="#cb2-5" tabindex="-1"></a>x</span>
<span id="cb2-6"><a href="#cb2-6" tabindex="-1"></a><span class="co">#> [1] 1 2 3</span></span></code></pre></div>
<p>C++ does not have copy-on-write built into the language, however it
has related concepts, copy-by-value and copy-by-reference. Copy-by-value
works similarly to R, except that R only copies when something is
changed, C++ <em>always</em> copies.</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span id="cb3-1"><a href="#cb3-1" tabindex="-1"></a><span class="dt">int</span> x <span class="op">=</span> <span class="dv">42</span><span class="op">;</span></span>
<span id="cb3-2"><a href="#cb3-2" tabindex="-1"></a><span class="dt">int</span> y <span class="op">=</span> x<span class="op">;</span></span>
<span id="cb3-3"><a href="#cb3-3" tabindex="-1"></a>y <span class="op">=</span> <span class="dv">0</span><span class="op">;</span></span>
<span id="cb3-4"><a href="#cb3-4" tabindex="-1"></a><span class="co">// x is still == 42</span></span></code></pre></div>
<p>Copy-by-reference does the opposite, both <code>x</code> and
<code>y</code> always point to the <em>same</em> underlying value. In
C++ you specify a reference with <code>&</code>.</p>
<div class="sourceCode" id="cb4"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span id="cb4-1"><a href="#cb4-1" tabindex="-1"></a><span class="dt">int</span> x <span class="op">=</span> <span class="dv">42</span><span class="op">;</span></span>
<span id="cb4-2"><a href="#cb4-2" tabindex="-1"></a><span class="dt">int</span> <span class="op">&</span>y <span class="op">=</span> x<span class="op">;</span></span>
<span id="cb4-3"><a href="#cb4-3" tabindex="-1"></a>y <span class="op">=</span> <span class="dv">0</span><span class="op">;</span></span>
<span id="cb4-4"><a href="#cb4-4" tabindex="-1"></a><span class="co">// both x and y are now 0</span></span></code></pre></div>
<p>Copy-by-reference is a valuable technique, as it avoids the overhead
of copying the data. However it can also lead to errors when internal
functions change their inputs unexpectedly. Rcpp uses copy-by-reference
by default (even if you pass a Rcpp vector class by value). This gives
Rcpp functions completely different semantics from normal R
functions.</p>
<p>We can illustrate this by creating a Rcpp function that multiples its
input vector by 2.</p>
<div class="sourceCode" id="cb5"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span id="cb5-1"><a href="#cb5-1" tabindex="-1"></a><span class="pp">#include </span><span class="im">"Rcpp.h"</span></span>
<span id="cb5-2"><a href="#cb5-2" tabindex="-1"></a><span class="kw">using</span> <span class="kw">namespace</span> Rcpp<span class="op">;</span></span>
<span id="cb5-3"><a href="#cb5-3" tabindex="-1"></a></span>
<span id="cb5-4"><a href="#cb5-4" tabindex="-1"></a><span class="co">// [[Rcpp::export]]</span></span>
<span id="cb5-5"><a href="#cb5-5" tabindex="-1"></a>NumericVector times_two_rcpp<span class="op">(</span>NumericVector x<span class="op">)</span> <span class="op">{</span></span>
<span id="cb5-6"><a href="#cb5-6" tabindex="-1"></a> <span class="cf">for</span> <span class="op">(</span><span class="dt">int</span> i <span class="op">=</span> <span class="dv">0</span><span class="op">;</span> i <span class="op"><</span> x<span class="op">.</span>size<span class="op">();</span> <span class="op">++</span>i<span class="op">)</span> <span class="op">{</span></span>
<span id="cb5-7"><a href="#cb5-7" tabindex="-1"></a> x<span class="op">[</span>i<span class="op">]</span> <span class="op">=</span> x<span class="op">[</span>i<span class="op">]</span> <span class="op">*</span> <span class="dv">2</span><span class="op">;</span></span>
<span id="cb5-8"><a href="#cb5-8" tabindex="-1"></a> <span class="op">}</span></span>
<span id="cb5-9"><a href="#cb5-9" tabindex="-1"></a> <span class="cf">return</span> x<span class="op">;</span></span>
<span id="cb5-10"><a href="#cb5-10" tabindex="-1"></a><span class="op">}</span></span></code></pre></div>
<p>If you do this with regular R functions, you will see the value of
<code>y</code> is <code>x</code> * 2, but the value of <code>x</code> is
unchanged.</p>
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" tabindex="-1"></a>x <span class="ot"><-</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>)</span>
<span id="cb6-2"><a href="#cb6-2" tabindex="-1"></a>y <span class="ot"><-</span> x <span class="sc">*</span> <span class="dv">2</span></span>
<span id="cb6-3"><a href="#cb6-3" tabindex="-1"></a>y</span>
<span id="cb6-4"><a href="#cb6-4" tabindex="-1"></a><span class="co">#> [1] 2 4 6</span></span>
<span id="cb6-5"><a href="#cb6-5" tabindex="-1"></a></span>
<span id="cb6-6"><a href="#cb6-6" tabindex="-1"></a>x</span>
<span id="cb6-7"><a href="#cb6-7" tabindex="-1"></a><span class="co">#> [1] 1 2 3</span></span></code></pre></div>
<p>However if we now call our <code>times_two_rcpp()</code> function we
get the right output value, but now <code>x</code> is <em>also
changed</em>.</p>
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" tabindex="-1"></a>z <span class="ot"><-</span> <span class="fu">times_two_rcpp</span>(x)</span>
<span id="cb7-2"><a href="#cb7-2" tabindex="-1"></a>z</span>
<span id="cb7-3"><a href="#cb7-3" tabindex="-1"></a><span class="co">#> [1] 2 4 6</span></span>
<span id="cb7-4"><a href="#cb7-4" tabindex="-1"></a></span>
<span id="cb7-5"><a href="#cb7-5" tabindex="-1"></a>x</span>
<span id="cb7-6"><a href="#cb7-6" tabindex="-1"></a><span class="co">#> [1] 2 4 6</span></span></code></pre></div>
<p>cpp11 strives to make its functions behave similarly to normal R
functions, while preserving the speed of Rcpp when read only access is
needed. Each of the r_vector classes in cpp11 has a normal <em>read
only</em> version that uses copy-by-reference, and a <em>writable</em>
version which uses copy-by-value.</p>
<div class="sourceCode" id="cb8"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span id="cb8-1"><a href="#cb8-1" tabindex="-1"></a><span class="pp">#include </span><span class="im">"cpp11/doubles.hpp"</span></span>
<span id="cb8-2"><a href="#cb8-2" tabindex="-1"></a></span>
<span id="cb8-3"><a href="#cb8-3" tabindex="-1"></a><span class="op">[[</span><span class="at">cpp11</span><span class="op">::</span><span class="at">register</span><span class="op">]]</span></span>
<span id="cb8-4"><a href="#cb8-4" tabindex="-1"></a>cpp11<span class="op">::</span>doubles times_two_cpp11<span class="op">(</span>cpp11<span class="op">::</span>writable<span class="op">::</span>doubles x<span class="op">)</span> <span class="op">{</span></span>
<span id="cb8-5"><a href="#cb8-5" tabindex="-1"></a> <span class="cf">for</span> <span class="op">(</span><span class="dt">int</span> i <span class="op">=</span> <span class="dv">0</span><span class="op">;</span> i <span class="op"><</span> x<span class="op">.</span>size<span class="op">();</span> <span class="op">++</span>i<span class="op">)</span> <span class="op">{</span></span>
<span id="cb8-6"><a href="#cb8-6" tabindex="-1"></a> x<span class="op">[</span>i<span class="op">]</span> <span class="op">=</span> x<span class="op">[</span>i<span class="op">]</span> <span class="op">*</span> <span class="dv">2</span><span class="op">;</span></span>
<span id="cb8-7"><a href="#cb8-7" tabindex="-1"></a> <span class="op">}</span></span>
<span id="cb8-8"><a href="#cb8-8" tabindex="-1"></a> <span class="cf">return</span> x<span class="op">;</span></span>
<span id="cb8-9"><a href="#cb8-9" tabindex="-1"></a><span class="op">}</span></span></code></pre></div>
<p>Using <code>cpp11::writable::doubles</code> first <em>copies</em> the
input vector, so when we do the multiplication we do not modify the
original data.</p>
<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" tabindex="-1"></a>x <span class="ot"><-</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>)</span>
<span id="cb9-2"><a href="#cb9-2" tabindex="-1"></a></span>
<span id="cb9-3"><a href="#cb9-3" tabindex="-1"></a>z <span class="ot"><-</span> <span class="fu">times_two_cpp11</span>(x)</span>
<span id="cb9-4"><a href="#cb9-4" tabindex="-1"></a>z</span>
<span id="cb9-5"><a href="#cb9-5" tabindex="-1"></a><span class="co">#> [1] 2 4 6</span></span>
<span id="cb9-6"><a href="#cb9-6" tabindex="-1"></a></span>
<span id="cb9-7"><a href="#cb9-7" tabindex="-1"></a>x</span>
<span id="cb9-8"><a href="#cb9-8" tabindex="-1"></a><span class="co">#> [1] 1 2 3</span></span></code></pre></div>
</div>
<div id="improve-safety" class="section level2">
<h2>Improve safety</h2>
<p>Internally R is written in C, not C++. In general C and C++ work well
together, a large part of C++’s success is due to its high
interoperability with C code. However one area in which C and C++ are
generally <em>not</em> interoperable is error handling. In C++ the most
common way to handle errors is with <a href="https://isocpp.org/wiki/faq/exceptions">exceptions</a>.</p>
<p>Exceptions provide a clean, safe way for objects to obtain and
cleanup resources automatically even when errors occur.</p>
<div id="c-safety" class="section level3">
<h3>C safety</h3>
<p>The C language does not have support for exceptions, so error
handling is done a variety of ways. These include error codes like <a href="https://en.cppreference.com/w/c/error/errno">errno</a>,
conditional statements, and in the R codebase the <a href="https://cplusplus.com/reference/csetjmp/longjmp/">longjmp</a>
function.</p>
<p><code>longjmp</code>, which stands for ‘long jump’ is a function that
allows you to transfer the control flow of a program to another location
elsewhere in the program. R uses long jumps extensively in its error
handling routines. If an R function is executing and an error occurs, a
long jump is called which ‘jumps’ the control flow into the error
handling code.</p>
<p>Crucially long jumps are <em>incompatible</em> with C++ <a href="https://isocpp.org/wiki/faq/dtors">destructors</a>. If a long jump
occurs the destructors of any active C++ objects are not run, and
therefore any resources (such as memory, file handles, etc.) managed by
those objects will cause a <a href="https://en.wikipedia.org/wiki/Resource_leak">resource
leak</a>.</p>
<p>For example, the following unsafe code would leak the memory
allocated in the C++ <code>std::vector</code> <code>x</code> when the R
API function <code>Rf_allocVector()</code> fails (since you can’t create
a vector of <code>-1</code> size).</p>
<div class="sourceCode" id="cb10"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span id="cb10-1"><a href="#cb10-1" tabindex="-1"></a><span class="bu">std::</span>vector<span class="op"><</span><span class="dt">double</span><span class="op">></span> x<span class="op">({</span><span class="fl">1.</span><span class="op">,</span> <span class="fl">2.</span><span class="op">,</span> <span class="fl">3.</span><span class="op">});</span></span>
<span id="cb10-2"><a href="#cb10-2" tabindex="-1"></a></span>
<span id="cb10-3"><a href="#cb10-3" tabindex="-1"></a>SEXP y <span class="op">=</span> PROTECT<span class="op">(</span>Rf_allocVector<span class="op">(</span>REALSXP<span class="op">,</span> <span class="op">-</span><span class="dv">1</span><span class="op">));</span></span></code></pre></div>
<p>cpp11 provides two mechanisms to make interfacing with Rs C API and
C++ code safer. <code>cpp11::unwind_protect()</code> takes a functional
object (a C++11 lamdba function or <code>std::function</code>) and
converts any C long jumps encountered to C++ exceptions. Now instead of
a C long jump happening when the <code>Rf_allocVector()</code> call
fails, a C++ exception occurs, which <em>does</em> trigger the
<code>std::vector</code> destructor, so that memory is automatically
released.</p>
<div class="sourceCode" id="cb11"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span id="cb11-1"><a href="#cb11-1" tabindex="-1"></a><span class="bu">std::</span>vector<span class="op"><</span><span class="dt">double</span><span class="op">></span> x<span class="op">({</span><span class="fl">1.</span><span class="op">,</span> <span class="fl">2.</span><span class="op">,</span> <span class="fl">3.</span><span class="op">});</span></span>
<span id="cb11-2"><a href="#cb11-2" tabindex="-1"></a></span>
<span id="cb11-3"><a href="#cb11-3" tabindex="-1"></a>SEXP y<span class="op">;</span></span>
<span id="cb11-4"><a href="#cb11-4" tabindex="-1"></a>unwind_protect<span class="op">([]()</span> <span class="op">{</span></span>
<span id="cb11-5"><a href="#cb11-5" tabindex="-1"></a> y <span class="op">=</span> Rf_allocVector<span class="op">(</span>REALSXP<span class="op">,</span> <span class="op">-</span><span class="dv">1</span><span class="op">);</span></span>
<span id="cb11-6"><a href="#cb11-6" tabindex="-1"></a><span class="op">})</span></span></code></pre></div>
<p><code>cpp11::safe()</code> is a more concise way to wrap a particular
R API function with <code>unwind_protect()</code>.</p>
<div class="sourceCode" id="cb12"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span id="cb12-1"><a href="#cb12-1" tabindex="-1"></a><span class="bu">std::</span>vector<span class="op"><</span><span class="dt">double</span><span class="op">></span> x<span class="op">({</span><span class="fl">1.</span><span class="op">,</span> <span class="fl">2.</span><span class="op">,</span> <span class="fl">3.</span><span class="op">});</span></span>
<span id="cb12-2"><a href="#cb12-2" tabindex="-1"></a></span>
<span id="cb12-3"><a href="#cb12-3" tabindex="-1"></a>SEXP y <span class="op">=</span> PROTECT<span class="op">(</span>safe<span class="op">[</span>Rf_allocVector<span class="op">](</span>REALSXP<span class="op">,</span> <span class="op">-</span><span class="dv">1</span><span class="op">));</span></span></code></pre></div>
<p>Again using <code>cpp11::safe()</code> converts the C long jump to a
C++ exception, so the memory is automatically released.</p>
<p>cpp11 uses these mechanisms extensively internally when calling the R
C API, which make cpp11 much safer against resource leaks than using
Rcpp or calling Rs C API by hand.</p>
</div>
<div id="c-safety-1" class="section level3">
<h3>C++ safety</h3>
<p>In the inverse of C safety we also need to ensure that C++ exceptions
do not reach the C call stack, as they will terminate R if that occurs.
Like Rcpp, cpp11 automatically generates <code>try / catch</code> guards
around registered functions to prevent this and also converts C++
exceptions into normal R errors. This is done without developer facing
code changes.</p>
<p>With both C and C++ sides of the coin covered we can safely use R’s C
API and C++ code together with C++ objects without leaking
resources.</p>
</div>
</div>
<div id="altrep-support" class="section level2">
<h2>Altrep support</h2>
<p><a href="https://svn.r-project.org/R/branches/ALTREP/ALTREP.html">ALTREP</a>
which stands for <strong>ALT</strong>ernative
<strong>REP</strong>resntations is a feature introduced in R 3.5. ALTREP
allows R internals and package authors to define alternative ways of
representing data to R. One example of the use of altrep is the
<code>:</code> operator.</p>
<p>Prior to R 3.5 <code>:</code> generated a full vector for the entire
sequence. e.g. <code>1:1000</code> would require 1000 individual values.
As of R 3.5 this sequence is instead represented by an ALTREP vector, so
<em>none</em> of the values actually exist in memory. Instead each time
R access a particular value in the sequence that value is computed
on-the-fly. This saves memory and excution time, and allows users to use
sequences which would otherwise be too big to fit in memory.</p>
<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" tabindex="-1"></a><span class="dv">1</span><span class="sc">:</span><span class="fl">1e9</span></span>
<span id="cb13-2"><a href="#cb13-2" tabindex="-1"></a><span class="co">#> [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20</span></span>
<span id="cb13-3"><a href="#cb13-3" tabindex="-1"></a><span class="co">#> [ reached getOption("max.print") -- omitted 999999980 entries ]</span></span></code></pre></div>
<p>Because Rcpp predates the introduction of ALTREP, it does not support
the interfaces needed to access ALTREP objects. This means the objects
must be converted to normal R objects as soon as they are used by
Rcpp.</p>
<div class="sourceCode" id="cb14"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span id="cb14-1"><a href="#cb14-1" tabindex="-1"></a><span class="pp">#include </span><span class="im">"Rcpp.h"</span></span>
<span id="cb14-2"><a href="#cb14-2" tabindex="-1"></a></span>
<span id="cb14-3"><a href="#cb14-3" tabindex="-1"></a><span class="co">// [[Rcpp::export]]</span></span>
<span id="cb14-4"><a href="#cb14-4" tabindex="-1"></a>Rcpp<span class="op">::</span>IntegerVector identity_rcpp<span class="op">(</span>Rcpp<span class="op">::</span>IntegerVector x<span class="op">)</span> <span class="op">{</span></span>
<span id="cb14-5"><a href="#cb14-5" tabindex="-1"></a> <span class="cf">return</span> x<span class="op">;</span></span>
<span id="cb14-6"><a href="#cb14-6" tabindex="-1"></a><span class="op">}</span></span></code></pre></div>
<div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" tabindex="-1"></a>x <span class="ot"><-</span> <span class="fu">identity_rcpp</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">100000</span>)</span>
<span id="cb15-2"><a href="#cb15-2" tabindex="-1"></a>lobstr<span class="sc">::</span><span class="fu">obj_size</span>(x)</span>
<span id="cb15-3"><a href="#cb15-3" tabindex="-1"></a><span class="co">#> 400.73 kB</span></span></code></pre></div>
<p>Whereas cpp11 objects preserve the ALTREP object.</p>
<div class="sourceCode" id="cb16"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span id="cb16-1"><a href="#cb16-1" tabindex="-1"></a><span class="pp">#include </span><span class="im">"cpp11/integers.hpp"</span></span>
<span id="cb16-2"><a href="#cb16-2" tabindex="-1"></a></span>
<span id="cb16-3"><a href="#cb16-3" tabindex="-1"></a><span class="op">[[</span><span class="at">cpp11</span><span class="op">::</span><span class="at">register</span><span class="op">]]</span></span>
<span id="cb16-4"><a href="#cb16-4" tabindex="-1"></a>cpp11<span class="op">::</span>integers identity_cpp11<span class="op">(</span>cpp11<span class="op">::</span>integers x<span class="op">)</span> <span class="op">{</span></span>
<span id="cb16-5"><a href="#cb16-5" tabindex="-1"></a> <span class="cf">return</span> x<span class="op">;</span></span>
<span id="cb16-6"><a href="#cb16-6" tabindex="-1"></a><span class="op">}</span></span></code></pre></div>
<div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" tabindex="-1"></a>y <span class="ot"><-</span> <span class="fu">identity_cpp11</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">100000</span>)</span>
<span id="cb17-2"><a href="#cb17-2" tabindex="-1"></a>lobstr<span class="sc">::</span><span class="fu">obj_size</span>(y)</span>
<span id="cb17-3"><a href="#cb17-3" tabindex="-1"></a><span class="co">#> 680 B</span></span></code></pre></div>
<div id="altrep-benchmarks" class="section level3">
<h3>Altrep benchmarks</h3>
<p>In these benchmarks note that Rcpp allocates memory for the ALTREP
vectors. This is because Rcpp implicitly converts them into normal R
vectors. cpp11 retains them as ALTREP vectors, so no additional memory
is needed.</p>
<p><code>foreach</code> and <code>accumulate</code> both use iterators
that take advantage of <code>REAL_GET_REGION</code> to buffer queries.
This makes them faster than naive C-style for loops with ALTREP
vectors.</p>
<p>The for2 case shows an optimization you can use if you know at
compile-time that you won’t be dealing with ALTREP vectors. By
specifying <code>false</code> to the second argument
(<code>is_altrep</code>), you can disable the ALTREP support. This
causes the ALTREP conditional code to be compiled out resulting in loop
unrolling (and speeds) identical to that generated by Rcpp.</p>
<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" tabindex="-1"></a><span class="fu">library</span>(cpp11test)</span>
<span id="cb18-2"><a href="#cb18-2" tabindex="-1"></a></span>
<span id="cb18-3"><a href="#cb18-3" tabindex="-1"></a>cases <span class="ot"><-</span> <span class="fu">expand.grid</span>(</span>
<span id="cb18-4"><a href="#cb18-4" tabindex="-1"></a> <span class="at">len =</span> <span class="fl">3e6</span>,</span>
<span id="cb18-5"><a href="#cb18-5" tabindex="-1"></a> <span class="at">vector =</span> <span class="fu">c</span>(<span class="st">"normal"</span>, <span class="st">"altrep"</span>),</span>
<span id="cb18-6"><a href="#cb18-6" tabindex="-1"></a> <span class="at">method =</span> <span class="fu">c</span>(<span class="st">"for"</span>, <span class="st">"foreach"</span>, <span class="st">"accumulate"</span>),</span>
<span id="cb18-7"><a href="#cb18-7" tabindex="-1"></a> <span class="at">pkg =</span> <span class="fu">c</span>(<span class="st">"cpp11"</span>, <span class="st">"rcpp"</span>),</span>
<span id="cb18-8"><a href="#cb18-8" tabindex="-1"></a> <span class="at">stringsAsFactors =</span> <span class="cn">FALSE</span></span>
<span id="cb18-9"><a href="#cb18-9" tabindex="-1"></a>)</span>
<span id="cb18-10"><a href="#cb18-10" tabindex="-1"></a></span>
<span id="cb18-11"><a href="#cb18-11" tabindex="-1"></a><span class="co"># Add special case</span></span>
<span id="cb18-12"><a href="#cb18-12" tabindex="-1"></a>cases <span class="ot"><-</span> <span class="fu">rbind</span>(<span class="fu">list</span>(<span class="at">len =</span> <span class="fl">3e6</span>, <span class="at">vector =</span> <span class="st">"normal"</span>, <span class="at">method =</span> <span class="st">"for2"</span>, <span class="at">pkg =</span> <span class="st">"cpp11"</span>), cases)</span>
<span id="cb18-13"><a href="#cb18-13" tabindex="-1"></a></span>
<span id="cb18-14"><a href="#cb18-14" tabindex="-1"></a>b_sum <span class="ot"><-</span> bench<span class="sc">::</span><span class="fu">press</span>(</span>
<span id="cb18-15"><a href="#cb18-15" tabindex="-1"></a> <span class="at">.grid =</span> cases,</span>
<span id="cb18-16"><a href="#cb18-16" tabindex="-1"></a> {</span>
<span id="cb18-17"><a href="#cb18-17" tabindex="-1"></a> seq_real <span class="ot"><-</span> <span class="cf">function</span>(x) <span class="fu">as.numeric</span>(<span class="fu">seq_len</span>(x))</span>
<span id="cb18-18"><a href="#cb18-18" tabindex="-1"></a> funs <span class="ot"><-</span> <span class="fu">c</span>(<span class="st">"normal"</span> <span class="ot">=</span> rnorm, <span class="st">"altrep"</span> <span class="ot">=</span> seq_real)</span>
<span id="cb18-19"><a href="#cb18-19" tabindex="-1"></a> x <span class="ot"><-</span> funs[[vector]](len)</span>
<span id="cb18-20"><a href="#cb18-20" tabindex="-1"></a> fun <span class="ot"><-</span> <span class="fu">match.fun</span>(<span class="fu">sprintf</span>(<span class="st">"%ssum_dbl_%s_"</span>, <span class="fu">ifelse</span>(pkg <span class="sc">==</span> <span class="st">"cpp11"</span>, <span class="st">""</span>, <span class="fu">paste0</span>(pkg, <span class="st">"_"</span>)), method))</span>
<span id="cb18-21"><a href="#cb18-21" tabindex="-1"></a> bench<span class="sc">::</span><span class="fu">mark</span>(</span>
<span id="cb18-22"><a href="#cb18-22" tabindex="-1"></a> <span class="fu">fun</span>(x)</span>
<span id="cb18-23"><a href="#cb18-23" tabindex="-1"></a> )</span>
<span id="cb18-24"><a href="#cb18-24" tabindex="-1"></a> }</span>
<span id="cb18-25"><a href="#cb18-25" tabindex="-1"></a>)[<span class="fu">c</span>(<span class="st">"pkg"</span>, <span class="st">"method"</span>, <span class="st">"vector"</span>, <span class="st">"min"</span>, <span class="st">"median"</span>, <span class="st">"mem_alloc"</span>, <span class="st">"itr/sec"</span>, <span class="st">"n_gc"</span>)]</span>
<span id="cb18-26"><a href="#cb18-26" tabindex="-1"></a></span>
<span id="cb18-27"><a href="#cb18-27" tabindex="-1"></a><span class="fu">saveRDS</span>(b_sum, <span class="st">"sum.Rds"</span>, <span class="at">version =</span> <span class="dv">2</span>)</span></code></pre></div>
<div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1" tabindex="-1"></a>knitr<span class="sc">::</span><span class="fu">kable</span>(<span class="fu">readRDS</span>(<span class="st">"sum.Rds"</span>))</span></code></pre></div>
<table>
<thead>
<tr class="header">
<th align="left">pkg</th>
<th align="left">method</th>
<th align="left">vector</th>
<th align="right">min</th>
<th align="right">median</th>
<th align="right">mem_alloc</th>
<th align="right">itr/sec</th>
<th align="right">n_gc</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td align="left">cpp11</td>
<td align="left">for2</td>
<td align="left">normal</td>
<td align="right">3.01ms</td>
<td align="right">3.21ms</td>
<td align="right">0B</td>
<td align="right">302.9364</td>
<td align="right">0</td>
</tr>
<tr class="even">
<td align="left">cpp11</td>
<td align="left">for</td>
<td align="left">normal</td>
<td align="right">2.93ms</td>
<td align="right">3.09ms</td>
<td align="right">0B</td>
<td align="right">319.9100</td>
<td align="right">0</td>
</tr>
<tr class="odd">
<td align="left">cpp11</td>
<td align="left">for</td>
<td align="left">altrep</td>
<td align="right">8.09ms</td>
<td align="right">8.44ms</td>
<td align="right">0B</td>
<td align="right">117.0562</td>
<td align="right">0</td>
</tr>
<tr class="even">
<td align="left">cpp11</td>
<td align="left">foreach</td>
<td align="left">normal</td>
<td align="right">2.97ms</td>
<td align="right">3.36ms</td>
<td align="right">0B</td>
<td align="right">292.8306</td>
<td align="right">0</td>
</tr>
<tr class="odd">
<td align="left">cpp11</td>
<td align="left">foreach</td>
<td align="left">altrep</td>
<td align="right">4.02ms</td>
<td align="right">4.18ms</td>
<td align="right">0B</td>
<td align="right">236.2339</td>
<td align="right">0</td>
</tr>
<tr class="even">
<td align="left">cpp11</td>
<td align="left">accumulate</td>
<td align="left">normal</td>
<td align="right">3.03ms</td>
<td align="right">3.24ms</td>
<td align="right">0B</td>
<td align="right">303.3408</td>
<td align="right">0</td>
</tr>
<tr class="odd">
<td align="left">cpp11</td>
<td align="left">accumulate</td>
<td align="left">altrep</td>
<td align="right">4.07ms</td>
<td align="right">4.31ms</td>
<td align="right">0B</td>
<td align="right">225.8066</td>
<td align="right">0</td>
</tr>
<tr class="even">
<td align="left">rcpp</td>
<td align="left">for</td>
<td align="left">normal</td>
<td align="right">2.81ms</td>
<td align="right">3.13ms</td>
<td align="right">0B</td>
<td align="right">311.3724</td>
<td align="right">0</td>
</tr>
<tr class="odd">
<td align="left">rcpp</td>
<td align="left">for</td>
<td align="left">altrep</td>
<td align="right">2.81ms</td>
<td align="right">3.13ms</td>
<td align="right">22.9MB</td>
<td align="right">311.6365</td>
<td align="right">0</td>
</tr>
<tr class="even">
<td align="left">rcpp</td>
<td align="left">foreach</td>
<td align="left">normal</td>
<td align="right">2.93ms</td>
<td align="right">3.46ms</td>
<td align="right">0B</td>
<td align="right">293.9831</td>
<td align="right">0</td>
</tr>
<tr class="odd">
<td align="left">rcpp</td>
<td align="left">foreach</td>
<td align="left">altrep</td>
<td align="right">2.81ms</td>
<td align="right">3.07ms</td>
<td align="right">22.9MB</td>
<td align="right">313.6250</td>
<td align="right">0</td>
</tr>
<tr class="even">
<td align="left">rcpp</td>
<td align="left">accumulate</td>
<td align="left">normal</td>
<td align="right">2.8ms</td>
<td align="right">3.01ms</td>
<td align="right">0B</td>
<td align="right">321.6647</td>
<td align="right">0</td>
</tr>
<tr class="odd">
<td align="left">rcpp</td>
<td align="left">accumulate</td>
<td align="left">altrep</td>
<td align="right">2.75ms</td>
<td align="right">3ms</td>
<td align="right">22.9MB</td>
<td align="right">322.9292</td>
<td align="right">0</td>
</tr>
</tbody>
</table>
<p><a href="https://github.com/r-lib/cpp11/blob/main/cpp11test/src/sum.cpp">cpp11test/src/sum.cpp</a>
contains the code ran in these benchmarks.</p>
</div>
</div>
<div id="utf-8-everywhere" class="section level2">
<h2>UTF-8 everywhere</h2>
<p>R has complicated support for Unicode strings and non-ASCII code
pages, whose behavior often differs substantially on different operating
systems, particularly Windows. Correctly dealing with this is
challenging and often feels like whack a mole.</p>
<p>To combat this complexity cpp11 uses the <a href="http://utf8everywhere.org/">UTF-8 everywhere</a> philosophy. This
means that whenever text data is converted from R data structures to C++
data structures by cpp11 the data is translated into UTF-8. Conversely
any text data coming from C++ code is assumed to be UTF-8 and marked as
such for R. Doing this universally avoids many locale specific issues
when dealing with Unicode text.</p>
<p>Concretely cpp11 always uses <code>Rf_translateCharUTF8()</code> when
obtaining <code>const char*</code> from <code>CHRSXP</code> objects and
uses <code>Rf_mkCharCE(, CE_UTF8)</code> when creating new
<code>CHRSXP</code> objects from <code>const char*</code> inputs.</p>
<!--TODO: unicode examples?-->
</div>
<div id="c11-features" class="section level2">
<h2>C++11 features</h2>
<p>C++11 provides a host of new features to the C++ language. cpp11 uses
a number of these including</p>
<ul>
<li><a href="https://en.cppreference.com/w/cpp/language/move_constructor">move
semantics</a></li>
<li><a href="https://en.cppreference.com/w/cpp/header/type_traits">type
traits</a></li>
<li><a href="https://en.cppreference.com/w/cpp/utility/initializer_list">initializer_list</a></li>
<li><a href="https://en.cppreference.com/w/cpp/language/parameter_pack">variadic
templates / parameter packs</a></li>
<li><a href="https://en.cppreference.com/w/cpp/language/user_literal">user
defined literals</a></li>
<li><a href="https://en.cppreference.com/w/cpp/language/attributes">user
defined attributes</a></li>
</ul>
</div>
<div id="simpler-implementation" class="section level2">
<h2>Simpler implementation</h2>
<p>Rcpp is very ambitious, with a number of advanced features, including
<a href="https://cran.r-project.org/package=Rcpp/vignettes/Rcpp-modules.pdf">modules</a>,
<a href="https://cran.r-project.org/package=Rcpp/vignettes/Rcpp-sugar.pdf">sugar</a>
and extensive support for <a href="https://CRAN.R-project.org/package=Rcpp/vignettes/Rcpp-attributes.pdf">attributes</a>.
While these are useful features, many R packages do not use one or any
of these advanced features. In addition the code needed to support these
features is complex and can be challenging to maintain.</p>
<p>cpp11 takes a more limited scope, providing only the set of r_vector
wrappers for R vector types, coercion methods to and from C++ and the
limited attributes necessary to support use in R packages.</p>
<p>This limited scope allows the implementation to be much simpler, the
headers in Rcpp 1.0.4 have 74,658 lines of code (excluding blank or
commented lines) in 379 files. Some headers in Rcpp are automatically
generated, removing these still gives you 25,249 lines of code in 357
files. In contrast the headers in cpp11 contain only 1,734 lines of code
in 19 files.</p>
<p>This reduction in complexity should make cpp11 an easier project to
maintain and ensure correctness, particularly around interactions with
the R garbage collector.</p>
<!--TODO: mention rchk compatibility here?-->
</div>
<div id="compilation-speed" class="section level2">
<h2>Compilation speed</h2>
<p>Rcpp always bundles all of its headers together, which causes slow
compilation times and high peak memory usage when compiling. The headers
in cpp11 are more easily decoupled, so you only can include only the
particular headers you actually use in a source file. This can
significantly improve the compilation speed and memory usage to compile
your package.</p>
<p>Here are some real examples of the reduction in compile time and peak
memory usage after converting packages to cpp11.</p>
<table style="width:100%;">
<colgroup>
<col width="14%" />
<col width="14%" />
<col width="14%" />
<col width="14%" />
<col width="14%" />
<col width="14%" />
<col width="14%" />
</colgroup>
<thead>
<tr class="header">
<th>package</th>
<th>Rcpp compile time</th>
<th>cpp11 compile time</th>
<th>Rcpp peak memory</th>
<th>cpp11 peak memory</th>
<th>Rcpp commit</th>
<th>cpp11 commit</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>haven</td>
<td>17.42s</td>
<td>7.13s</td>
<td>428MB</td>
<td>204MB</td>
<td><a href="https://github.com/tidyverse/haven/compare/a3cf75a4...978cb034">a3cf75a4</a></td>
<td><a href="https://github.com/tidyverse/haven/compare/a3cf75a4...978cb034">978cb034</a></td>
</tr>
<tr class="even">
<td>readr</td>
<td>124.13s</td>
<td>81.08s</td>
<td>969MB</td>
<td>684MB</td>
<td><a href="https://github.com/tidyverse/readr/compare/ec0d8989...aa89ff72">ec0d8989</a></td>
<td><a href="https://github.com/tidyverse/readr/compare/ec0d8989...aa89ff72">aa89ff72</a></td>
</tr>
<tr class="odd">
<td>roxygen2</td>
<td>17.34s</td>
<td>4.24s</td>
<td>371MB</td>
<td>109MB</td>
<td><a href="https://github.com/r-lib/roxygen2/compare/6f081b75...e8e1e22d">6f081b75</a></td>
<td><a href="https://github.com/r-lib/roxygen2/compare/6f081b75...e8e1e22d">e8e1e22d</a></td>
</tr>
<tr class="even">
<td>tidyr</td>
<td>14.25s</td>
<td>3.34s</td>
<td>363MB</td>
<td>83MB</td>
<td><a href="https://github.com/tidyverse/tidyr/compare/3899ed51...60f7c7d4">3899ed51</a></td>
<td><a href="https://github.com/tidyverse/tidyr/compare/3899ed51...60f7c7d4">60f7c7d4</a></td>
</tr>
</tbody>
</table>
</div>
<div id="header-only" class="section level2">
<h2>Header only</h2>
<p>Rcpp has long been a <em>mostly</em> <a href="https://en.wikipedia.org/wiki/Header-only">header only</a>
library, however is not a <em>completely</em> header only library. There
have been <a href="https://github.com/tidyverse/dplyr/issues/2308">cases</a> when a
package was first installed with version X of Rcpp, and then a newer
version of Rcpp was later installed. Then when the original package X
was loaded R would crash, because the <a href="https://en.wikipedia.org/wiki/Application_binary_interface">Application
Binary Interface</a> of Rcpp had changed between the two versions.</p>
<p>Because cpp11 consists of exclusively headers this issue does not
occur.</p>
</div>
<div id="vendoring" class="section level2">
<h2>Vendoring</h2>
<p>In the go community the concept of <a href="https://go.googlesource.com/proposal/+/master/design/25719-go15vendor.md">vendoring</a>
is widespread. Vendoring means that you copy the code for the
dependencies into your project’s source tree. This ensures the
dependency code is fixed and stable until it is updated. Because cpp11
is fully <a href="#header-only">header only</a> you can vendor the code
in the same way. <code>cpp11::vendor_cpp11()</code> is provided to do
this if you choose.</p>
<p>Vendoring has advantages and drawbacks however. The advantage is that
changes to the cpp11 project could never break your existing code. The
drawbacks are both minor, your package size is now slightly larger, and
major, you no longer get bugfixes and new features until you explicitly
update cpp11.</p>
<p>I think the majority of packages should use
<code>LinkingTo: cpp11</code> and <em>not</em> vendor the cpp11
dependency. However, vendoring can be appropriate for certain
situations.</p>
</div>
<div id="protection" class="section level2">
<h2>Protection</h2>
<p>cpp11 uses a custom double linked list data structure to track
objects it is managing. This structure is much more efficient for large
numbers of objects than using <code>R_PreserveObject()</code> /
<code>R_ReleaseObjects()</code> as is done in Rcpp.</p>
<div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" tabindex="-1"></a><span class="fu">library</span>(cpp11test)</span>
<span id="cb20-2"><a href="#cb20-2" tabindex="-1"></a>grid <span class="ot"><-</span> <span class="fu">expand.grid</span>(<span class="at">len =</span> <span class="fu">c</span>(<span class="dv">10</span> <span class="sc">^</span> (<span class="dv">2</span><span class="sc">:</span><span class="dv">5</span>), <span class="fl">2e5</span>), <span class="at">pkg =</span> <span class="fu">c</span>(<span class="st">"cpp11"</span>, <span class="st">"rcpp"</span>), <span class="at">stringsAsFactors =</span> <span class="cn">FALSE</span>)</span>
<span id="cb20-3"><a href="#cb20-3" tabindex="-1"></a>b_release <span class="ot"><-</span> bench<span class="sc">::</span><span class="fu">press</span>(<span class="at">.grid =</span> grid,</span>
<span id="cb20-4"><a href="#cb20-4" tabindex="-1"></a> {</span>
<span id="cb20-5"><a href="#cb20-5" tabindex="-1"></a> fun <span class="ot">=</span> <span class="fu">match.fun</span>(<span class="fu">sprintf</span>(<span class="st">"%s_release_"</span>, pkg))</span>
<span id="cb20-6"><a href="#cb20-6" tabindex="-1"></a> bench<span class="sc">::</span><span class="fu">mark</span>(</span>
<span id="cb2