skimr
Version:
CLI EDA for CSVs
855 lines (819 loc) • 47.1 kB
HTML
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="author" content="Yihui Xie" />
<meta name="date" content="2023-11-01" />
<title>An Introduction to xfun</title>
<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
var i, h, a;
for (i = 0; i < hs.length; i++) {
h = hs[i];
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
a = h.attributes;
while (a.length > 0) h.removeAttribute(a[0].name);
}
});
</script>
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">
code {
white-space: pre;
}
.sourceCode {
overflow: visible;
}
</style>
<style type="text/css" data-origin="pandoc">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; }
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.at { color: #7d9029; }
code span.bn { color: #40a070; }
code span.bu { color: #008000; }
code span.cf { color: #007020; font-weight: bold; }
code span.ch { color: #4070a0; }
code span.cn { color: #880000; }
code span.co { color: #60a0b0; font-style: italic; }
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.do { color: #ba2121; font-style: italic; }
code span.dt { color: #902000; }
code span.dv { color: #40a070; }
code span.er { color: #ff0000; font-weight: bold; }
code span.ex { }
code span.fl { color: #40a070; }
code span.fu { color: #06287e; }
code span.im { color: #008000; font-weight: bold; }
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.kw { color: #007020; font-weight: bold; }
code span.op { color: #666666; }
code span.ot { color: #007020; }
code span.pp { color: #bc7a00; }
code span.sc { color: #4070a0; }
code span.ss { color: #bb6688; }
code span.st { color: #4070a0; }
code span.va { color: #19177c; }
code span.vs { color: #4070a0; }
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; }
</style>
<script>
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
var sheets = document.styleSheets;
for (var i = 0; i < sheets.length; i++) {
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
var j = 0;
while (j < rules.length) {
var rule = rules[j];
// check if there is a div.sourceCode rule
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
j++;
continue;
}
var style = rule.style.cssText;
// check if color or background-color is set
if (rule.style.color === '' && rule.style.backgroundColor === '') {
j++;
continue;
}
// replace div.sourceCode by a pre.sourceCode rule
sheets[i].deleteRule(j);
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
}
}
})();
</script>
<style type="text/css">body {
background-color: #fff;
margin: 1em auto;
max-width: 800px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 20px;
}
#header {
text-align: center;
}
#TOC {
clear: both;
margin: 0 0 10px 0;
padding: 4px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table:not([class]) {
margin: auto;
min-width: 40%;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table[summary="R argblock"] {
width: 100%;
border: none;
}
table:not([class]) th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table:not([class]) td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table:not([class]), table:not([class]) th, table:not([class]) td {
border-left-style: none;
border-right-style: none;
}
table:not([class]) tr.odd {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 13px;
padding-bottom: 1px;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f5f5f5;
border-radius: 3px;
color: #333;
}
pre {
overflow-x: auto;
border-radius: 3px;
margin: 5px 0 10px 0;
padding: 10px;
}
pre:not([class]) {
background-color: white;
border: #f5f5f5 1px solid;
}
pre:not([class]) code {
color: #444;
background-color: white;
}
code {
font-family: monospace;
font-size: 90%;
}
p > code, li > code {
padding: 2px 4px;
color: #d14;
border: 1px solid #e1e1e8;
white-space: inherit;
}
div.figure {
text-align: center;
}
table > caption, div.figure p.caption {
font-style: italic;
}
table > caption span, div.figure p.caption span {
font-style: normal;
font-weight: bold;
}
p {
margin: 0 0 10px;
}
table:not([class]) {
margin: auto auto 10px auto;
}
img:not([class]) {
background-color: #FFFFFF;
padding: 2px;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
max-width: 100%;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f5f5f5;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f5f5f5;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f5f5f5;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }
div.r-help-page {
background-color: #f9f9f9;
border-bottom: #ddd 1px solid;
margin-bottom: 10px;
padding: 10px;
}
div.r-help-page:hover {
background-color: #f4f4f4;
}
code > span.kw { color: #555; font-weight: bold; }
code > span.dt { color: #902000; }
code > span.dv { color: #40a070; }
code > span.bn { color: #d14; }
code > span.fl { color: #d14; }
code > span.ch { color: #d14; }
code > span.st { color: #d14; }
code > span.co { color: #888888; font-style: italic; }
code > span.ot { color: #007020; }
code > span.al { color: #ff0000; font-weight: bold; }
code > span.fu { color: #900; font-weight: bold; }
code > span.er { color: #a61717; background-color: #e3d2d2; }
</style>
</head>
<body>
<h1 class="title toc-ignore">An Introduction to xfun</h1>
<h3 class="subtitle">A Collection of Miscellaneous Functions</h3>
<h4 class="author">Yihui Xie</h4>
<h4 class="date">2023-11-01</h4>
<div id="TOC">
<ul>
<li><a href="#no-more-partial-matching-for-lists" id="toc-no-more-partial-matching-for-lists">No more partial matching for
lists!</a></li>
<li><a href="#output-character-vectors-for-human-eyes" id="toc-output-character-vectors-for-human-eyes">Output character
vectors for human eyes</a></li>
<li><a href="#print-the-content-of-a-text-file" id="toc-print-the-content-of-a-text-file">Print the content of a text
file</a></li>
<li><a href="#get-the-data-uri-of-a-file" id="toc-get-the-data-uri-of-a-file">Get the data URI of a file</a></li>
<li><a href="#match-strings-and-do-substitutions" id="toc-match-strings-and-do-substitutions">Match strings and do
substitutions</a></li>
<li><a href="#search-and-replace-strings-in-files" id="toc-search-and-replace-strings-in-files">Search and replace strings
in files</a></li>
<li><a href="#manipulate-filename-extensions" id="toc-manipulate-filename-extensions">Manipulate filename
extensions</a></li>
<li><a href="#find-files-in-a-project-without-the-pain-of-thinking-about-absoluterelative-paths" id="toc-find-files-in-a-project-without-the-pain-of-thinking-about-absoluterelative-paths">Find
files (in a project) without the pain of thinking about
absolute/relative paths</a></li>
<li><a href="#types-of-operating-systems" id="toc-types-of-operating-systems">Types of operating systems</a></li>
<li><a href="#loading-and-attaching-packages" id="toc-loading-and-attaching-packages">Loading and attaching
packages</a></li>
<li><a href="#readwrite-files-in-utf-8" id="toc-readwrite-files-in-utf-8">Read/write files in UTF-8</a></li>
<li><a href="#convert-numbers-to-english-words" id="toc-convert-numbers-to-english-words">Convert numbers to English
words</a></li>
<li><a href="#cache-an-r-expression-to-an-rds-file" id="toc-cache-an-r-expression-to-an-rds-file">Cache an R expression to
an RDS file</a></li>
<li><a href="#check-reverse-dependencies-of-a-package" id="toc-check-reverse-dependencies-of-a-package">Check reverse
dependencies of a package</a></li>
<li><a href="#input-a-character-vector-into-the-rstudio-source-editor" id="toc-input-a-character-vector-into-the-rstudio-source-editor">Input a
character vector into the RStudio source editor</a></li>
<li><a href="#print-session-information" id="toc-print-session-information">Print session information</a></li>
</ul>
</div>
<p>After writing about 20 R packages, I found I had accumulated several
utility functions that I used across different packages, so I decided to
extract them into a separate package. Previously I had been using the
evil triple-colon <code>:::</code> to access these internal utility
functions. Now with <strong>xfun</strong>, these functions have been
exported, and more importantly, documented. It should be better to use
them under the sun instead of in the dark.</p>
<p>This page shows examples of a subset of functions in this package.
For a full list of functions, see the help page
<code>help(package = 'xfun')</code>. The source package is available on
Github: <a href="https://github.com/yihui/xfun" class="uri">https://github.com/yihui/xfun</a>.</p>
<div id="no-more-partial-matching-for-lists" class="section level2">
<h2>No more partial matching for lists!</h2>
<p>I have been bitten many times by partial matching in lists, e.g.,
when I want <code>x$a</code> but the element <code>a</code> does not
exist in the list <code>x</code>, it returns the value
<code>x$abc</code> if <code>abc</code> exists in <code>x</code>. A
strict list is a list for which the partial matching of the
<code>$</code> operator is disabled. The functions
<code>xfun::strict_list()</code> and <code>xfun::as_strict_list()</code>
are the equivalents to <code>base::list()</code> and
<code>base::as.list()</code> respectively which always return as strict
list, e.g.,</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" tabindex="-1"></a><span class="fu">library</span>(xfun)</span>
<span id="cb1-2"><a href="#cb1-2" tabindex="-1"></a>(<span class="at">z =</span> <span class="fu">strict_list</span>(<span class="at">aaa =</span> <span class="st">"I am aaa"</span>, <span class="at">b =</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>))</span></code></pre></div>
<pre><code>## $aaa
## [1] "I am aaa"
##
## $b
## [1] 1 2 3 4 5</code></pre>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" tabindex="-1"></a>z<span class="sc">$</span>a <span class="co"># NULL (strict matching)</span></span></code></pre></div>
<pre><code>## NULL</code></pre>
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" tabindex="-1"></a>z<span class="sc">$</span>aaa <span class="co"># I am aaa</span></span></code></pre></div>
<pre><code>## [1] "I am aaa"</code></pre>
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" tabindex="-1"></a>z<span class="sc">$</span>b</span></code></pre></div>
<pre><code>## [1] 1 2 3 4 5</code></pre>
<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" tabindex="-1"></a>z<span class="sc">$</span>c <span class="ot">=</span> <span class="st">"you can create a new element"</span></span>
<span id="cb9-2"><a href="#cb9-2" tabindex="-1"></a></span>
<span id="cb9-3"><a href="#cb9-3" tabindex="-1"></a>z2 <span class="ot">=</span> <span class="fu">unclass</span>(z) <span class="co"># a normal list</span></span>
<span id="cb9-4"><a href="#cb9-4" tabindex="-1"></a>z2<span class="sc">$</span>a <span class="co"># partial matching</span></span></code></pre></div>
<pre><code>## [1] "I am aaa"</code></pre>
<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" tabindex="-1"></a>z3 <span class="ot">=</span> <span class="fu">as_strict_list</span>(z2) <span class="co"># a strict list again</span></span>
<span id="cb11-2"><a href="#cb11-2" tabindex="-1"></a>z3<span class="sc">$</span>a <span class="co"># NULL (strict matching) again!</span></span></code></pre></div>
<pre><code>## NULL</code></pre>
<p>Similarly, the default partial matching in <code>attr()</code> can be
annoying, too. The function <code>xfun::attr()</code> is simply a
shorthand of <code>attr(..., exact = TRUE)</code>.</p>
<p>I want it, or I do not want. There is no “I probably want”.</p>
</div>
<div id="output-character-vectors-for-human-eyes" class="section level2">
<h2>Output character vectors for human eyes</h2>
<p>When R prints a character vector, your eyes may be distracted by the
indices like <code>[1]</code>, double quotes, and escape sequences. To
see a character vector in its “raw” form, you can use
<code>cat(..., sep = '\n')</code>. The function
<code>raw_string()</code> marks a character vector as “raw”, and the
corresponding printing function will call <code>cat(sep = '\n')</code>
to print the character vector to the console.</p>
<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" tabindex="-1"></a><span class="fu">library</span>(xfun)</span>
<span id="cb13-2"><a href="#cb13-2" tabindex="-1"></a><span class="fu">raw_string</span>(<span class="fu">head</span>(LETTERS))</span></code></pre></div>
<pre><code>A
B
C
D
E
F</code></pre>
<div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" tabindex="-1"></a>(<span class="at">x =</span> <span class="fu">c</span>(<span class="st">"a </span><span class="sc">\"</span><span class="st">b</span><span class="sc">\"</span><span class="st">"</span>, <span class="st">"hello</span><span class="sc">\t</span><span class="st">world!"</span>))</span></code></pre></div>
<pre><code>[1] "a \"b\"" "hello\tworld!"</code></pre>
<div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" tabindex="-1"></a><span class="fu">raw_string</span>(x) <span class="co"># this is more likely to be what you want to see</span></span></code></pre></div>
<pre><code>a "b"
hello world!</code></pre>
</div>
<div id="print-the-content-of-a-text-file" class="section level2">
<h2>Print the content of a text file</h2>
<p>I have used <code>paste(readLines('foo'), collapse = '\n')</code>
many times before I decided to write a simple wrapper function
<code>xfun::file_string()</code>. This function also makes use of
<code>raw_string()</code>, so you can see the content of a file in the
console as a side-effect, e.g.,</p>
<div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1" tabindex="-1"></a>f <span class="ot">=</span> <span class="fu">system.file</span>(<span class="st">"LICENSE"</span>, <span class="at">package =</span> <span class="st">"xfun"</span>)</span>
<span id="cb19-2"><a href="#cb19-2" tabindex="-1"></a>xfun<span class="sc">::</span><span class="fu">file_string</span>(f)</span></code></pre></div>
<pre><code>YEAR: 2018-2023
COPYRIGHT HOLDER: Yihui Xie</code></pre>
<div class="sourceCode" id="cb21"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1" tabindex="-1"></a><span class="fu">as.character</span>(xfun<span class="sc">::</span><span class="fu">file_string</span>(f)) <span class="co"># essentially a character string</span></span></code></pre></div>
<pre><code>[1] "YEAR: 2018-2023\nCOPYRIGHT HOLDER: Yihui Xie"</code></pre>
</div>
<div id="get-the-data-uri-of-a-file" class="section level2">
<h2>Get the data URI of a file</h2>
<p>Files can be encoded into base64 strings via
<code>base64_uri()</code>. This is a common technique to embed arbitrary
files in HTML documents (which is <a href="https://bookdown.org/yihui/rmarkdown-cookbook/embed-file.html">what
<code>xfun::embed_file()</code> does</a> and it is based on
<code>base64_uri()</code>).</p>
<div class="sourceCode" id="cb23"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1" tabindex="-1"></a>f <span class="ot">=</span> <span class="fu">system.file</span>(<span class="st">"LICENSE"</span>, <span class="at">package =</span> <span class="st">"xfun"</span>)</span>
<span id="cb23-2"><a href="#cb23-2" tabindex="-1"></a>xfun<span class="sc">::</span><span class="fu">base64_uri</span>(f)</span></code></pre></div>
<pre><code>## [1] "data:text/plain;base64,WUVBUjogMjAxOC0yMDIzCkNPUFlSSUdIVCBIT0xERVI6IFlpaHVpIFhpZQo="</code></pre>
</div>
<div id="match-strings-and-do-substitutions" class="section level2">
<h2>Match strings and do substitutions</h2>
<p>After typing the code
<code>x = grep(pattern, x, value = TRUE); gsub(pattern, '\\1', x)</code>
many times, I combined them into a single function
<code>xfun::grep_sub()</code>.</p>
<div class="sourceCode" id="cb25"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1" tabindex="-1"></a>xfun<span class="sc">::</span><span class="fu">grep_sub</span>(<span class="st">'a([b]+)c'</span>, <span class="st">'a</span><span class="sc">\\</span><span class="st">U</span><span class="sc">\\</span><span class="st">1c'</span>, <span class="fu">c</span>(<span class="st">'abc'</span>, <span class="st">'abbbc'</span>, <span class="st">'addc'</span>, <span class="st">'123'</span>), <span class="at">perl =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
<pre><code>## [1] "aBc" "aBBBc"</code></pre>
</div>
<div id="search-and-replace-strings-in-files" class="section level2">
<h2>Search and replace strings in files</h2>
<p>I can never remember how to properly use <code>grep</code> or
<code>sed</code> to search and replace strings in multiple files. My
favorite IDE, RStudio, has not provided this feature yet (you can only
search and replace in the currently opened file). Therefore I did a
quick and dirty implementation in R, including functions
<code>gsub_files()</code>, <code>gsub_dir()</code>, and
<code>gsub_ext()</code>, to search and replace strings in multiple files
under a directory. Note that the files are assumed to be encoded in
UTF-8. If you do not use UTF-8, we cannot be friends. Seriously.</p>
<p>All functions are based on <code>gsub_file()</code>, which performs
searching and replacing in a single file, e.g.,</p>
<div class="sourceCode" id="cb27"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb27-1"><a href="#cb27-1" tabindex="-1"></a><span class="fu">library</span>(xfun)</span>
<span id="cb27-2"><a href="#cb27-2" tabindex="-1"></a>f <span class="ot">=</span> <span class="fu">tempfile</span>()</span>
<span id="cb27-3"><a href="#cb27-3" tabindex="-1"></a><span class="fu">writeLines</span>(<span class="fu">c</span>(<span class="st">"hello"</span>, <span class="st">"world"</span>), f)</span>
<span id="cb27-4"><a href="#cb27-4" tabindex="-1"></a><span class="fu">gsub_file</span>(f, <span class="st">"world"</span>, <span class="st">"woRld"</span>, <span class="at">fixed =</span> <span class="cn">TRUE</span>)</span>
<span id="cb27-5"><a href="#cb27-5" tabindex="-1"></a><span class="fu">file_string</span>(f)</span></code></pre></div>
<pre><code>hello
woRld</code></pre>
<p>The function <code>gsub_dir()</code> is very flexible: you can limit
the list of files by MIME types, or extensions. For example, if you want
to do substitution in text files, you may use
<code>gsub_dir(..., mimetype = '^text/')</code>.</p>
<p>The function <code>process_file()</code> is a more general way to
process files. Basically it reads a file, process the content with a
function that you pass to it, and writes back the text, e.g.,</p>
<div class="sourceCode" id="cb29"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1" tabindex="-1"></a><span class="fu">process_file</span>(f, <span class="cf">function</span>(x) {</span>
<span id="cb29-2"><a href="#cb29-2" tabindex="-1"></a> <span class="fu">rep</span>(x, <span class="dv">3</span>) <span class="co"># repeat the content 3 times</span></span>
<span id="cb29-3"><a href="#cb29-3" tabindex="-1"></a>})</span>
<span id="cb29-4"><a href="#cb29-4" tabindex="-1"></a><span class="fu">file_string</span>(f)</span></code></pre></div>
<pre><code>hello
woRld
hello
woRld
hello
woRld</code></pre>
<p><strong>WARNING</strong>: Before using these functions, make sure
that you have backed up your files, or version control your files. The
files will be modified in-place. If you do not back up or use version
control, there is no chance to regret.</p>
</div>
<div id="manipulate-filename-extensions" class="section level2">
<h2>Manipulate filename extensions</h2>
<p>Functions <code>file_ext()</code> and <code>sans_ext()</code> are
based on functions in <strong>tools</strong>. The function
<code>with_ext()</code> adds or replaces extensions of filenames, and it
is vectorized.</p>
<div class="sourceCode" id="cb31"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1" tabindex="-1"></a><span class="fu">library</span>(xfun)</span>
<span id="cb31-2"><a href="#cb31-2" tabindex="-1"></a>p <span class="ot">=</span> <span class="fu">c</span>(<span class="st">"abc.doc"</span>, <span class="st">"def123.tex"</span>, <span class="st">"path/to/foo.Rmd"</span>)</span>
<span id="cb31-3"><a href="#cb31-3" tabindex="-1"></a><span class="fu">file_ext</span>(p)</span></code></pre></div>
<pre><code>## [1] "doc" "tex" "Rmd"</code></pre>
<div class="sourceCode" id="cb33"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb33-1"><a href="#cb33-1" tabindex="-1"></a><span class="fu">sans_ext</span>(p)</span></code></pre></div>
<pre><code>## [1] "abc" "def123" "path/to/foo"</code></pre>
<div class="sourceCode" id="cb35"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb35-1"><a href="#cb35-1" tabindex="-1"></a><span class="fu">with_ext</span>(p, <span class="st">".txt"</span>)</span></code></pre></div>
<pre><code>## [1] "abc.txt" "def123.txt" "path/to/foo.txt"</code></pre>
<div class="sourceCode" id="cb37"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb37-1"><a href="#cb37-1" tabindex="-1"></a><span class="fu">with_ext</span>(p, <span class="fu">c</span>(<span class="st">".ppt"</span>, <span class="st">".sty"</span>, <span class="st">".Rnw"</span>))</span></code></pre></div>
<pre><code>## [1] "abc.ppt" "def123.sty" "path/to/foo.Rnw"</code></pre>
<div class="sourceCode" id="cb39"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb39-1"><a href="#cb39-1" tabindex="-1"></a><span class="fu">with_ext</span>(p, <span class="st">"html"</span>)</span></code></pre></div>
<pre><code>## [1] "abc.html" "def123.html" "path/to/foo.html"</code></pre>
</div>
<div id="find-files-in-a-project-without-the-pain-of-thinking-about-absoluterelative-paths" class="section level2">
<h2>Find files (in a project) without the pain of thinking about
absolute/relative paths</h2>
<p>The function <code>proj_root()</code> was inspired by the
<strong>rprojroot</strong> package, and tries to find the root directory
of a project. Currently it only supports R package projects and RStudio
projects by default. It is much less sophisticated than
<strong>rprojroot</strong>.</p>
<p>The function <code>from_root()</code> was inspired by
<code>here::here()</code>, but returns a relative path (relative to the
project’s root directory found by <code>proj_root()</code>) instead of
an absolute path. For example,
<code>xfun::from_root('data', 'cars.csv')</code> in a code chunk of
<code>docs/foo.Rmd</code> will return <code>../data/cars.csv</code> when
<code>docs/</code> and <code>data/</code> directories are under the root
directory of a project.</p>
<pre><code>root/
|-- data/
| |-- cars.csv
|
|-- docs/
|-- foo.Rmd</code></pre>
<p>If file paths are too much pain for you to think about, you can just
pass an incomplete path to the function <code>magic_path()</code>, and
it will try to find the actual path recursively under subdirectories of
a root directory. For example, you may only provide a base filename, and
<code>magic_path()</code> will look for this file under subdirectories
and return the actual path if it is found. By default, it returns a
relative path, which is relative to the current working directory. With
the above example, <code>xfun::magic_path('cars.csv')</code> in a code
chunk of <code>docs/foo.Rmd</code> will return
<code>../data/cars.csv</code>, if <code>cars.csv</code> is a unique
filename in the project. You can freely move it to any folders of this
project, and <code>magic_path()</code> will still find it. If you are
not using a project to manage files, <code>magic_path()</code> will look
for the file under subdirectories of the current working directory.</p>
</div>
<div id="types-of-operating-systems" class="section level2">
<h2>Types of operating systems</h2>
<p>The series of functions <code>is_linux()</code>,
<code>is_macos()</code>, <code>is_unix()</code>, and
<code>is_windows()</code> test the types of the OS, using the
information from <code>.Platform</code> and <code>Sys.info()</code>,
e.g.,</p>
<div class="sourceCode" id="cb42"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb42-1"><a href="#cb42-1" tabindex="-1"></a>xfun<span class="sc">::</span><span class="fu">is_macos</span>()</span></code></pre></div>
<pre><code>## [1] TRUE</code></pre>
<div class="sourceCode" id="cb44"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb44-1"><a href="#cb44-1" tabindex="-1"></a>xfun<span class="sc">::</span><span class="fu">is_unix</span>()</span></code></pre></div>
<pre><code>## [1] TRUE</code></pre>
<div class="sourceCode" id="cb46"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb46-1"><a href="#cb46-1" tabindex="-1"></a>xfun<span class="sc">::</span><span class="fu">is_linux</span>()</span></code></pre></div>
<pre><code>## [1] FALSE</code></pre>
<div class="sourceCode" id="cb48"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb48-1"><a href="#cb48-1" tabindex="-1"></a>xfun<span class="sc">::</span><span class="fu">is_windows</span>()</span></code></pre></div>
<pre><code>## [1] FALSE</code></pre>
</div>
<div id="loading-and-attaching-packages" class="section level2">
<h2>Loading and attaching packages</h2>
<p>Oftentimes I see users attach a series of packages in the beginning
of their scripts by repeating <code>library()</code> multiple times.
This could be easily vectorized, and the function
<code>xfun::pkg_attach()</code> does this job. For example,</p>
<div class="sourceCode" id="cb50"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb50-1"><a href="#cb50-1" tabindex="-1"></a><span class="fu">library</span>(testit)</span>
<span id="cb50-2"><a href="#cb50-2" tabindex="-1"></a><span class="fu">library</span>(parallel)</span>
<span id="cb50-3"><a href="#cb50-3" tabindex="-1"></a><span class="fu">library</span>(tinytex)</span>
<span id="cb50-4"><a href="#cb50-4" tabindex="-1"></a><span class="fu">library</span>(mime)</span></code></pre></div>
<p>is equivalent to</p>
<div class="sourceCode" id="cb51"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb51-1"><a href="#cb51-1" tabindex="-1"></a>xfun<span class="sc">::</span><span class="fu">pkg_attach</span>(<span class="fu">c</span>(<span class="st">'testit'</span>, <span class="st">'parallel'</span>, <span class="st">'tinytex'</span>, <span class="st">'mime'</span>))</span></code></pre></div>
<p>I also see scripts that contain code to install a package if it is
not available, e.g.,</p>
<div class="sourceCode" id="cb52"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb52-1"><a href="#cb52-1" tabindex="-1"></a><span class="cf">if</span> (<span class="sc">!</span><span class="fu">requireNamespace</span>(<span class="st">'tinytex'</span>)) <span class="fu">install.packages</span>(<span class="st">'tinytex'</span>)</span>
<span id="cb52-2"><a href="#cb52-2" tabindex="-1"></a><span class="fu">library</span>(tinytex)</span></code></pre></div>
<p>This could be done via</p>
<div class="sourceCode" id="cb53"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb53-1"><a href="#cb53-1" tabindex="-1"></a>xfun<span class="sc">::</span><span class="fu">pkg_attach2</span>(<span class="st">'tinytex'</span>)</span></code></pre></div>
<p>The function <code>pkg_attach2()</code> is a shorthand of
<code>pkg_attach(..., install = TRUE)</code>, which means if a package
is not available, install it. This function can also deal with multiple
packages.</p>
<p>The function <code>loadable()</code> tests if a package is
loadable.</p>
</div>
<div id="readwrite-files-in-utf-8" class="section level2">
<h2>Read/write files in UTF-8</h2>
<p>Functions <code>read_utf8()</code> and <code>write_utf8()</code> can
be used to read/write files in UTF-8. They are simple wrappers of
<code>readLines()</code> and <code>writeLines()</code>.</p>
</div>
<div id="convert-numbers-to-english-words" class="section level2">
<h2>Convert numbers to English words</h2>
<p>The function <code>numbers_to_words()</code> (or <code>n2w()</code>
for short) converts numbers to English words.</p>
<div class="sourceCode" id="cb54"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb54-1"><a href="#cb54-1" tabindex="-1"></a><span class="fu">n2w</span>(<span class="dv">0</span>, <span class="at">cap =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
<pre><code>## [1] "Zero"</code></pre>
<div class="sourceCode" id="cb56"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb56-1"><a href="#cb56-1" tabindex="-1"></a><span class="fu">n2w</span>(<span class="fu">seq</span>(<span class="dv">0</span>, <span class="dv">121</span>, <span class="dv">11</span>), <span class="at">and =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
<pre><code>## [1] "zero" "eleven"
## [3] "twenty-two" "thirty-three"
## [5] "forty-four" "fifty-five"
## [7] "sixty-six" "seventy-seven"
## [9] "eighty-eight" "ninety-nine"
## [11] "one hundred and ten" "one hundred and twenty-one"</code></pre>
<div class="sourceCode" id="cb58"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb58-1"><a href="#cb58-1" tabindex="-1"></a><span class="fu">n2w</span>(<span class="fl">1e+06</span>)</span></code></pre></div>
<pre><code>## [1] "one million"</code></pre>
<div class="sourceCode" id="cb60"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb60-1"><a href="#cb60-1" tabindex="-1"></a><span class="fu">n2w</span>(<span class="fl">1e+11</span> <span class="sc">+</span> <span class="dv">12345678</span>)</span></code></pre></div>
<pre><code>## [1] "one hundred billion, twelve million, three hundred forty-five thousand, six hundred seventy-eight"</code></pre>
<div class="sourceCode" id="cb62"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb62-1"><a href="#cb62-1" tabindex="-1"></a><span class="fu">n2w</span>(<span class="sc">-</span><span class="dv">987654321</span>)</span></code></pre></div>
<pre><code>## [1] "minus nine hundred eighty-seven million, six hundred fifty-four thousand, three hundred twenty-one"</code></pre>
<div class="sourceCode" id="cb64"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb64-1"><a href="#cb64-1" tabindex="-1"></a><span class="fu">n2w</span>(<span class="fl">1e+15</span> <span class="sc">-</span> <span class="dv">1</span>)</span></code></pre></div>
<pre><code>## [1] "nine hundred ninety-nine trillion, nine hundred ninety-nine billion, nine hundred ninety-nine million, nine hundred ninety-nine thousand, nine hundred ninety-nine"</code></pre>
</div>
<div id="cache-an-r-expression-to-an-rds-file" class="section level2">
<h2>Cache an R expression to an RDS file</h2>
<p>The function <code>cache_rds()</code> provides a simple caching
mechanism: the first time an expression is passed to it, it saves the
result to an RDS file; the next time it will read the RDS file and
return the value instead of evaluating the expression again. If you want
to invalidate the cache, you can use the argument
<code>rerun = TRUE</code>.</p>
<div class="sourceCode" id="cb66"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb66-1"><a href="#cb66-1" tabindex="-1"></a>res <span class="ot">=</span> xfun<span class="sc">::</span><span class="fu">cache_rds</span>({</span>
<span id="cb66-2"><a href="#cb66-2" tabindex="-1"></a> <span class="co"># pretend the computing here is a time-consuming</span></span>
<span id="cb66-3"><a href="#cb66-3" tabindex="-1"></a> <span class="fu">Sys.sleep</span>(<span class="dv">2</span>)</span>
<span id="cb66-4"><a href="#cb66-4" tabindex="-1"></a> <span class="dv">1</span><span class="sc">:</span><span class="dv">10</span></span>
<span id="cb66-5"><a href="#cb66-5" tabindex="-1"></a>})</span></code></pre></div>
<p>When the function is used in a code chunk in a <strong>knitr</strong>
document, the RDS cache file is saved to a path determined by the chunk
label (the base filename) and the chunk option <code>cache.path</code>
(the cache directory), so you do not have to provide the
<code>file</code> and <code>dir</code> arguments of
<code>cache_rds()</code>.</p>
<p>This caching mechanism is much simpler than <strong>knitr</strong>’s
caching. Cache invalidation is often tricky (see <a href="https://yihui.org/en/2018/06/cache-invalidation/">this post</a>),
so this function may be helpful if you want more transparency and
control over when to invalidate the cache (for <code>cache_rds()</code>,
the cache is invalidated when the cache file is deleted, which can be
achieved via the argument <code>rerun = TRUE</code>).</p>
<p>As documented on the help page of <code>cache_rds()</code>, there are
two common cases in which you may want to invalidate the cache:</p>
<ol style="list-style-type: decimal">
<li><p>The code in the expression has changed, e.g., if you changed the
code from <code>cache_rds({x + 1})</code> to
<code>cache_rds({x + 2})</code>, the cache will be automatically
invalidated and the expression will be re-evaluated. However, please
note that changes in white spaces or comments do not matter. Or
generally speaking, as long as the change does not affect the parsed
expression, the cache will not be invalidated, e.g., the two expressions
below are essentially identical (hence if you have executed
<code>cache_rds()</code> on the first expression, the second expression
will be able to take advantage of the cache):</p>
<div class="sourceCode" id="cb67"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb67-1"><a href="#cb67-1" tabindex="-1"></a>res <span class="ot">=</span> xfun<span class="sc">::</span><span class="fu">cache_rds</span>({</span>
<span id="cb67-2"><a href="#cb67-2" tabindex="-1"></a> <span class="fu">Sys.sleep</span>(<span class="dv">3</span> );</span>
<span id="cb67-3"><a href="#cb67-3" tabindex="-1"></a> x<span class="ot">=</span><span class="dv">1</span><span class="sc">:</span><span class="dv">10</span>; <span class="co"># semi-colons won't matter</span></span>
<span id="cb67-4"><a href="#cb67-4" tabindex="-1"></a> x<span class="sc">+</span><span class="dv">1</span>;</span>
<span id="cb67-5"><a href="#cb67-5" tabindex="-1"></a>})</span>
<span id="cb67-6"><a href="#cb67-6" tabindex="-1"></a></span>
<span id="cb67-7"><a href="#cb67-7" tabindex="-1"></a>res <span class="ot">=</span> xfun<span class="sc">::</span><span class="fu">cache_rds</span>({</span>
<span id="cb67-8"><a href="#cb67-8" tabindex="-1"></a> <span class="fu">Sys.sleep</span>(<span class="dv">3</span>)</span>
<span id="cb67-9"><a href="#cb67-9" tabindex="-1"></a> x <span class="ot">=</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">10</span> <span class="co"># a comment</span></span>
<span id="cb67-10"><a href="#cb67-10" tabindex="-1"></a> x <span class="sc">+</span></span>
<span id="cb67-11"><a href="#cb67-11" tabindex="-1"></a> <span class="dv">1</span> <span class="co"># feel free to make any changes in white spaces</span></span>
<span id="cb67-12"><a href="#cb67-12" tabindex="-1"></a>})</span></code></pre></div></li>
<li><p>The value of a global variable in the expression has changed,
e.g., if <code>y</code> has changed, you are most likely to want to
invalidate the cache and rerun the expression below:</p>
<div class="sourceCode" id="cb68"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb68-1"><a href="#cb68-1" tabindex="-1"></a>res <span class="ot">=</span> xfun<span class="sc">::</span><span class="fu">cache_rds</span>({</span>
<span id="cb68-2"><a href="#cb68-2" tabindex="-1"></a> x <span class="ot">=</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">10</span></span>
<span id="cb68-3"><a href="#cb68-3" tabindex="-1"></a> x <span class="sc">+</span> y</span>
<span id="cb68-4"><a href="#cb68-4" tabindex="-1"></a>})</span></code></pre></div>
<p>This is because <code>x</code> is a local variable in the expression,
and <code>y</code> is an external global variable (not created locally
like <code>x</code>). To invalidate the cache when <code>y</code> has
changed, you may let <code>cache_rds()</code> know through the
<code>hash</code> argument that <code>y</code> needs to be considered
when deciding if the cache should be invalidated:</p>
<div class="sourceCode" id="cb69"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb69-1"><a href="#cb69-1" tabindex="-1"></a>res <span class="ot">=</span> xfun<span class="sc">::</span><span class="fu">cache_rds</span>({</span>
<span id="cb69-2"><a href="#cb69-2" tabindex="-1"></a> x <span class="ot">=</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">10</span></span>
<span id="cb69-3"><a href="#cb69-3" tabindex="-1"></a> x <span class="sc">+</span> y</span>
<span id="cb69-4"><a href="#cb69-4" tabindex="-1"></a>}, <span class="at">hash =</span> <span class="fu">list</span>(y))</span></code></pre></div>
<p>If you do not want to provide this list of value(s) to the
<code>hash</code> argument, you may try <code>hash = "auto"</code>
instead, which asks <code>cache_rds()</code> to try to figure out all
global variables automatically and use a list of their values as the
value for the <code>hash</code> argument.</p>
<div class="sourceCode" id="cb70"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb70-1"><a href="#cb70-1" tabindex="-1"></a>res <span class="ot">=</span> xfun<span class="sc">::</span><span class="fu">cache_rds</span>({</span>
<span id="cb70-2"><a href="#cb70-2" tabindex="-1"></a> x <span class="ot">=</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">10</span></span>
<span id="cb70-3"><a href="#cb70-3" tabindex="-1"></a> x <span class="sc">+</span> y</span>
<span id="cb70-4"><a href="#cb70-4" tabindex="-1"></a>}, <span class="at">hash =</span> <span class="st">"auto"</span>)</span></code></pre></div></li>
</ol>
</div>
<div id="check-reverse-dependencies-of-a-package" class="section level2">
<h2>Check reverse dependencies of a package</h2>
<p>Running <code>R CMD check</code> on the reverse dependencies of
<strong>knitr</strong> and <strong>rmarkdown</strong> is my least
favorite thing in developing R packages, because the numbers of their
reverse dependencies are huge. The function <code>rev_check()</code>
reflects some of my past experience in this process. I think I have
automated it as much as possible, and made it as easy as possible to
discover possible new problems introduced by the current version of the
package (compared to the CRAN version). Finally I can just sit back and
let it run.</p>
</div>
<div id="input-a-character-vector-into-the-rstudio-source-editor" class="section level2">
<h2>Input a character vector into the RStudio source editor</h2>
<p>The function <code>rstudio_type()</code> inputs characters in the
RStudio source editor as if they were typed by a human. I came up with
the idea when preparing my talk for rstudio::conf 2018 (<a href="https://yihui.org/en/2018/03/blogdown-video-rstudio-conf/">see
this post</a> for more details).</p>
</div>
<div id="print-session-information" class="section level2">
<h2>Print session information</h2>
<p>Since I have never been fully satisfied by the output of
<code>sessionInfo()</code>, I tweaked it to make it more useful in my
use cases. For example, it is rarely useful to print out the names of
base R packages, or information about the matrix products / BLAS /
LAPACK. Oftentimes I want additional information in the session
information, such as the Pandoc version when <strong>rmarkdown</strong>
is used. The function <code>session_info()</code> tweaks the output of
<code>sessionInfo()</code>, and makes it possible for other packages to
append information in the output of <code>session_info()</code>.</p>
<p>You can choose to print out the versions of only the packages you
specify, e.g.,</p>
<div class="sourceCode" id="cb71"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb71-1"><a href="#cb71-1" tabindex="-1"></a>xfun<span class="sc">::</span><span class="fu">session_info</span>(<span class="fu">c</span>(<span class="st">'xfun'</span>, <span class="st">'rmarkdown'</span>, <span class="st">'knitr'</span>, <span class="st">'tinytex'</span>), <span class="at">dependencies =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
<pre><code>## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.5.2
##
## Locale: C / en_US.UTF-8 / en_US.UTF-8 / C / en_US.UTF-8 / en_US.UTF-8
##
## Package version:
## knitr_1.45.2 rmarkdown_2.25 tinytex_0.48.3 xfun_0.41
##
## Pandoc version: 3.1.1
##
## LaTeX version used:
## TeX Live 2023 (TinyTeX) with tlmgr 2023-04-08</code></pre>
</div>
<script type="text/javascript">
window.onload = function() {
var i, fig = 1, caps = document.getElementsByClassName('caption');
for (i = 0; i < caps.length; i++) {
var cap = caps[i];
if (cap.parentElement.className !== 'figure' || cap.nodeName !== 'P')
continue;
cap.innerHTML = '<span>Figure ' + fig + ':</span> ' + cap.innerHTML;
fig++;
}
fig = 1;
caps = document.getElementsByTagName('caption');
for (i = 0; i < caps.length; i++) {
var cap = caps[i];
if (cap.parentElement.nodeName !== 'TABLE') continue;
cap.innerHTML = '<span>Table ' + fig + ':</span> ' + cap.innerHTML;
fig++;
}
}
</script>
<!-- code folding -->
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>