UNPKG

skimr

Version:

CLI EDA for CSVs

699 lines (670 loc) 74.6 kB
<!DOCTYPE html> <html> <head> <meta charset="utf-8" /> <meta name="generator" content="pandoc" /> <meta http-equiv="X-UA-Compatible" content="IE=EDGE" /> <meta name="viewport" content="width=device-width, initial-scale=1" /> <title>Get started with vroom</title> <script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to // be compatible with the behavior of Pandoc < 2.8). document.addEventListener('DOMContentLoaded', function(e) { var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); var i, h, a; for (i = 0; i < hs.length; i++) { h = hs[i]; if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 a = h.attributes; while (a.length > 0) h.removeAttribute(a[0].name); } }); </script> <style type="text/css"> code{white-space: pre-wrap;} span.smallcaps{font-variant: small-caps;} span.underline{text-decoration: underline;} div.column{display: inline-block; vertical-align: top; width: 50%;} div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} ul.task-list{list-style: none;} </style> <style type="text/css"> code { white-space: pre; } .sourceCode { overflow: visible; } </style> <style type="text/css" data-origin="pandoc"> pre > code.sourceCode { white-space: pre; position: relative; } pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } pre > code.sourceCode > span:empty { height: 1.2em; } .sourceCode { overflow: visible; } code.sourceCode > span { color: inherit; text-decoration: inherit; } div.sourceCode { margin: 1em 0; } pre.sourceCode { margin: 0; } @media screen { div.sourceCode { overflow: auto; } } @media print { pre > code.sourceCode { white-space: pre-wrap; } pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } } pre.numberSource code { counter-reset: source-line 0; } pre.numberSource code > span { position: relative; left: -4em; counter-increment: source-line; } pre.numberSource code > span > a:first-child::before { content: counter(source-line); position: relative; left: -1em; text-align: right; vertical-align: baseline; border: none; display: inline-block; -webkit-touch-callout: none; -webkit-user-select: none; -khtml-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; padding: 0 4px; width: 4em; color: #aaaaaa; } pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } div.sourceCode { } @media screen { pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } } code span.al { color: #ff0000; font-weight: bold; } code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } code span.at { color: #7d9029; } code span.bn { color: #40a070; } code span.bu { color: #008000; } code span.cf { color: #007020; font-weight: bold; } code span.ch { color: #4070a0; } code span.cn { color: #880000; } code span.co { color: #60a0b0; font-style: italic; } code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } code span.do { color: #ba2121; font-style: italic; } code span.dt { color: #902000; } code span.dv { color: #40a070; } code span.er { color: #ff0000; font-weight: bold; } code span.ex { } code span.fl { color: #40a070; } code span.fu { color: #06287e; } code span.im { color: #008000; font-weight: bold; } code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } code span.kw { color: #007020; font-weight: bold; } code span.op { color: #666666; } code span.ot { color: #007020; } code span.pp { color: #bc7a00; } code span.sc { color: #4070a0; } code span.ss { color: #bb6688; } code span.st { color: #4070a0; } code span.va { color: #19177c; } code span.vs { color: #4070a0; } code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } </style> <script> // apply pandoc div.sourceCode style to pre.sourceCode instead (function() { var sheets = document.styleSheets; for (var i = 0; i < sheets.length; i++) { if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue; try { var rules = sheets[i].cssRules; } catch (e) { continue; } var j = 0; while (j < rules.length) { var rule = rules[j]; // check if there is a div.sourceCode rule if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") { j++; continue; } var style = rule.style.cssText; // check if color or background-color is set if (rule.style.color === '' && rule.style.backgroundColor === '') { j++; continue; } // replace div.sourceCode by a pre.sourceCode rule sheets[i].deleteRule(j); sheets[i].insertRule('pre.sourceCode{' + style + '}', j); } } })(); </script> <style type="text/css">body { background-color: #fff; margin: 1em auto; max-width: 700px; overflow: visible; padding-left: 2em; padding-right: 2em; font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; font-size: 14px; line-height: 1.35; } #TOC { clear: both; margin: 0 0 10px 10px; padding: 4px; width: 400px; border: 1px solid #CCCCCC; border-radius: 5px; background-color: #f6f6f6; font-size: 13px; line-height: 1.3; } #TOC .toctitle { font-weight: bold; font-size: 15px; margin-left: 5px; } #TOC ul { padding-left: 40px; margin-left: -1.5em; margin-top: 5px; margin-bottom: 5px; } #TOC ul ul { margin-left: -2em; } #TOC li { line-height: 16px; } table { margin: 1em auto; border-width: 1px; border-color: #DDDDDD; border-style: outset; border-collapse: collapse; } table th { border-width: 2px; padding: 5px; border-style: inset; } table td { border-width: 1px; border-style: inset; line-height: 18px; padding: 5px 5px; } table, table th, table td { border-left-style: none; border-right-style: none; } table thead, table tr.even { background-color: #f7f7f7; } p { margin: 0.5em 0; } blockquote { background-color: #f6f6f6; padding: 0.25em 0.75em; } hr { border-style: solid; border: none; border-top: 1px solid #777; margin: 28px 0; } dl { margin-left: 0; } dl dd { margin-bottom: 13px; margin-left: 13px; } dl dt { font-weight: bold; } ul { margin-top: 0; } ul li { list-style: circle outside; } ul ul { margin-bottom: 0; } pre, code { background-color: #f7f7f7; border-radius: 3px; color: #333; white-space: pre-wrap; } pre { border-radius: 3px; margin: 5px 0px 10px 0px; padding: 10px; } pre:not([class]) { background-color: #f7f7f7; } code { font-family: Consolas, Monaco, 'Courier New', monospace; font-size: 85%; } p > code, li > code { padding: 2px 0px; } div.figure { text-align: center; } img { background-color: #FFFFFF; padding: 2px; border: 1px solid #DDDDDD; border-radius: 3px; border: 1px solid #CCCCCC; margin: 0 5px; } h1 { margin-top: 0; font-size: 35px; line-height: 40px; } h2 { border-bottom: 4px solid #f7f7f7; padding-top: 10px; padding-bottom: 2px; font-size: 145%; } h3 { border-bottom: 2px solid #f7f7f7; padding-top: 10px; font-size: 120%; } h4 { border-bottom: 1px solid #f7f7f7; margin-left: 8px; font-size: 105%; } h5, h6 { border-bottom: 1px solid #ccc; font-size: 105%; } a { color: #0033dd; text-decoration: none; } a:hover { color: #6666ff; } a:visited { color: #800080; } a:visited:hover { color: #BB00BB; } a[href^="http:"] { text-decoration: underline; } a[href^="https:"] { text-decoration: underline; } code > span.kw { color: #555; font-weight: bold; } code > span.dt { color: #902000; } code > span.dv { color: #40a070; } code > span.bn { color: #d14; } code > span.fl { color: #d14; } code > span.ch { color: #d14; } code > span.st { color: #d14; } code > span.co { color: #888888; font-style: italic; } code > span.ot { color: #007020; } code > span.al { color: #ff0000; font-weight: bold; } code > span.fu { color: #900; font-weight: bold; } code > span.er { color: #a61717; background-color: #e3d2d2; } </style> </head> <body> <h1 class="title toc-ignore">Get started with vroom</h1> <p>The vroom package contains one main function <code>vroom()</code> which is used to read all types of delimited files. A delimited file is any file in which the data is separated (delimited) by one or more characters.</p> <p>The most common type of delimited files are CSV (Comma Separated Values) or TSV (Tab Separated Values) files, typically these files have a <code>.csv</code> and <code>.tsv</code> suffix respectively.</p> <div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" tabindex="-1"></a><span class="fu">library</span>(vroom)</span></code></pre></div> <p>This vignette covers the following topics:</p> <ul> <li>The basics of reading files, including <ul> <li>single files</li> <li>multiple files</li> <li>compressed files</li> <li>remote files</li> </ul></li> <li>Skipping particular columns.</li> <li>Specifying column types, for additional safety and when the automatic guessing fails.</li> <li>Writing regular and compressed files</li> </ul> <div id="reading-files" class="section level2"> <h2>Reading files</h2> <p>To read a CSV, or other type of delimited file with vroom pass the file to <code>vroom()</code>. The delimiter will be automatically guessed if it is a common delimiter; e.g. (“,” “” ” “|” “:” “;”). If the guessing fails or you are using a less common delimiter specify it with the <code>delim</code> parameter. (e.g. <code>delim = &quot;,&quot;</code>).</p> <p>We have included an example CSV file in the vroom package for use in examples and tests. Access it with <code>vroom_example(&quot;mtcars.csv&quot;)</code></p> <div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" tabindex="-1"></a><span class="co"># See where the example file is stored on your machine</span></span> <span id="cb2-2"><a href="#cb2-2" tabindex="-1"></a>file <span class="ot">&lt;-</span> <span class="fu">vroom_example</span>(<span class="st">&quot;mtcars.csv&quot;</span>)</span> <span id="cb2-3"><a href="#cb2-3" tabindex="-1"></a>file</span> <span id="cb2-4"><a href="#cb2-4" tabindex="-1"></a><span class="co">#&gt; [1] &quot;/private/tmp/RtmpxOjBii/Rinst1277939526365/vroom/extdata/mtcars.csv&quot;</span></span> <span id="cb2-5"><a href="#cb2-5" tabindex="-1"></a></span> <span id="cb2-6"><a href="#cb2-6" tabindex="-1"></a><span class="co"># Read the file, by default vroom will guess the delimiter automatically.</span></span> <span id="cb2-7"><a href="#cb2-7" tabindex="-1"></a><span class="fu">vroom</span>(file)</span> <span id="cb2-8"><a href="#cb2-8" tabindex="-1"></a><span class="co">#&gt; Rows: 32 Columns: 12</span></span> <span id="cb2-9"><a href="#cb2-9" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb2-10"><a href="#cb2-10" tabindex="-1"></a><span class="co">#&gt; Delimiter: &quot;,&quot;</span></span> <span id="cb2-11"><a href="#cb2-11" tabindex="-1"></a><span class="co">#&gt; chr (1): model</span></span> <span id="cb2-12"><a href="#cb2-12" tabindex="-1"></a><span class="co">#&gt; dbl (11): mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb</span></span> <span id="cb2-13"><a href="#cb2-13" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb2-14"><a href="#cb2-14" tabindex="-1"></a><span class="co">#&gt; ℹ Use `spec()` to retrieve the full column specification for this data.</span></span> <span id="cb2-15"><a href="#cb2-15" tabindex="-1"></a><span class="co">#&gt; ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span> <span id="cb2-16"><a href="#cb2-16" tabindex="-1"></a><span class="co">#&gt; # A tibble: 32 × 12</span></span> <span id="cb2-17"><a href="#cb2-17" tabindex="-1"></a><span class="co">#&gt; model mpg cyl disp hp drat wt qsec vs am gear carb</span></span> <span id="cb2-18"><a href="#cb2-18" tabindex="-1"></a><span class="co">#&gt; &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;</span></span> <span id="cb2-19"><a href="#cb2-19" tabindex="-1"></a><span class="co">#&gt; 1 Mazda RX4 21 6 160 110 3.9 2.62 16.5 0 1 4 4</span></span> <span id="cb2-20"><a href="#cb2-20" tabindex="-1"></a><span class="co">#&gt; 2 Mazda RX4 W… 21 6 160 110 3.9 2.88 17.0 0 1 4 4</span></span> <span id="cb2-21"><a href="#cb2-21" tabindex="-1"></a><span class="co">#&gt; 3 Datsun 710 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1</span></span> <span id="cb2-22"><a href="#cb2-22" tabindex="-1"></a><span class="co">#&gt; # ℹ 29 more rows</span></span> <span id="cb2-23"><a href="#cb2-23" tabindex="-1"></a></span> <span id="cb2-24"><a href="#cb2-24" tabindex="-1"></a><span class="co"># You can also specify it explicitly, which is (slightly) faster, and safer if</span></span> <span id="cb2-25"><a href="#cb2-25" tabindex="-1"></a><span class="co"># you know how the file is delimited.</span></span> <span id="cb2-26"><a href="#cb2-26" tabindex="-1"></a><span class="fu">vroom</span>(file, <span class="at">delim =</span> <span class="st">&quot;,&quot;</span>)</span> <span id="cb2-27"><a href="#cb2-27" tabindex="-1"></a><span class="co">#&gt; Rows: 32 Columns: 12</span></span> <span id="cb2-28"><a href="#cb2-28" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb2-29"><a href="#cb2-29" tabindex="-1"></a><span class="co">#&gt; Delimiter: &quot;,&quot;</span></span> <span id="cb2-30"><a href="#cb2-30" tabindex="-1"></a><span class="co">#&gt; chr (1): model</span></span> <span id="cb2-31"><a href="#cb2-31" tabindex="-1"></a><span class="co">#&gt; dbl (11): mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb</span></span> <span id="cb2-32"><a href="#cb2-32" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb2-33"><a href="#cb2-33" tabindex="-1"></a><span class="co">#&gt; ℹ Use `spec()` to retrieve the full column specification for this data.</span></span> <span id="cb2-34"><a href="#cb2-34" tabindex="-1"></a><span class="co">#&gt; ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span> <span id="cb2-35"><a href="#cb2-35" tabindex="-1"></a><span class="co">#&gt; # A tibble: 32 × 12</span></span> <span id="cb2-36"><a href="#cb2-36" tabindex="-1"></a><span class="co">#&gt; model mpg cyl disp hp drat wt qsec vs am gear carb</span></span> <span id="cb2-37"><a href="#cb2-37" tabindex="-1"></a><span class="co">#&gt; &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;</span></span> <span id="cb2-38"><a href="#cb2-38" tabindex="-1"></a><span class="co">#&gt; 1 Mazda RX4 21 6 160 110 3.9 2.62 16.5 0 1 4 4</span></span> <span id="cb2-39"><a href="#cb2-39" tabindex="-1"></a><span class="co">#&gt; 2 Mazda RX4 W… 21 6 160 110 3.9 2.88 17.0 0 1 4 4</span></span> <span id="cb2-40"><a href="#cb2-40" tabindex="-1"></a><span class="co">#&gt; 3 Datsun 710 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1</span></span> <span id="cb2-41"><a href="#cb2-41" tabindex="-1"></a><span class="co">#&gt; # ℹ 29 more rows</span></span></code></pre></div> </div> <div id="reading-multiple-files" class="section level2"> <h2>Reading multiple files</h2> <p>If you are reading a set of files which all have the same columns (as in, names and types), you can pass the filenames directly to <code>vroom()</code> and it will combine them into one result. vroom’s example datasets include several files named like <code>mtcars-i.csv</code>. These files contain subsets of the <code>mtcars</code> data, for cars with different numbers of cylinders. First, we get a character vector of these filepaths.</p> <div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" tabindex="-1"></a>ve <span class="ot">&lt;-</span> <span class="fu">grep</span>(<span class="st">&quot;mtcars-[0-9].csv&quot;</span>, <span class="fu">vroom_examples</span>(), <span class="at">value =</span> <span class="cn">TRUE</span>)</span> <span id="cb3-2"><a href="#cb3-2" tabindex="-1"></a>files <span class="ot">&lt;-</span> <span class="fu">sapply</span>(ve, vroom_example)</span> <span id="cb3-3"><a href="#cb3-3" tabindex="-1"></a>files</span> <span id="cb3-4"><a href="#cb3-4" tabindex="-1"></a><span class="co">#&gt; mtcars-4.csv </span></span> <span id="cb3-5"><a href="#cb3-5" tabindex="-1"></a><span class="co">#&gt; &quot;/private/tmp/RtmpxOjBii/Rinst1277939526365/vroom/extdata/mtcars-4.csv&quot; </span></span> <span id="cb3-6"><a href="#cb3-6" tabindex="-1"></a><span class="co">#&gt; mtcars-6.csv </span></span> <span id="cb3-7"><a href="#cb3-7" tabindex="-1"></a><span class="co">#&gt; &quot;/private/tmp/RtmpxOjBii/Rinst1277939526365/vroom/extdata/mtcars-6.csv&quot; </span></span> <span id="cb3-8"><a href="#cb3-8" tabindex="-1"></a><span class="co">#&gt; mtcars-8.csv </span></span> <span id="cb3-9"><a href="#cb3-9" tabindex="-1"></a><span class="co">#&gt; &quot;/private/tmp/RtmpxOjBii/Rinst1277939526365/vroom/extdata/mtcars-8.csv&quot;</span></span></code></pre></div> <p>Now we can efficiently read them into one table by passing the filenames directly to vroom.</p> <div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" tabindex="-1"></a><span class="fu">vroom</span>(files)</span> <span id="cb4-2"><a href="#cb4-2" tabindex="-1"></a><span class="co">#&gt; Rows: 32 Columns: 12</span></span> <span id="cb4-3"><a href="#cb4-3" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb4-4"><a href="#cb4-4" tabindex="-1"></a><span class="co">#&gt; Delimiter: &quot;,&quot;</span></span> <span id="cb4-5"><a href="#cb4-5" tabindex="-1"></a><span class="co">#&gt; chr (1): model</span></span> <span id="cb4-6"><a href="#cb4-6" tabindex="-1"></a><span class="co">#&gt; dbl (11): mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb</span></span> <span id="cb4-7"><a href="#cb4-7" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb4-8"><a href="#cb4-8" tabindex="-1"></a><span class="co">#&gt; ℹ Use `spec()` to retrieve the full column specification for this data.</span></span> <span id="cb4-9"><a href="#cb4-9" tabindex="-1"></a><span class="co">#&gt; ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span> <span id="cb4-10"><a href="#cb4-10" tabindex="-1"></a><span class="co">#&gt; # A tibble: 32 × 12</span></span> <span id="cb4-11"><a href="#cb4-11" tabindex="-1"></a><span class="co">#&gt; model mpg cyl disp hp drat wt qsec vs am gear carb</span></span> <span id="cb4-12"><a href="#cb4-12" tabindex="-1"></a><span class="co">#&gt; &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;</span></span> <span id="cb4-13"><a href="#cb4-13" tabindex="-1"></a><span class="co">#&gt; 1 Datsun 710 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1</span></span> <span id="cb4-14"><a href="#cb4-14" tabindex="-1"></a><span class="co">#&gt; 2 Merc 240D 24.4 4 147. 62 3.69 3.19 20 1 0 4 2</span></span> <span id="cb4-15"><a href="#cb4-15" tabindex="-1"></a><span class="co">#&gt; 3 Merc 230 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2</span></span> <span id="cb4-16"><a href="#cb4-16" tabindex="-1"></a><span class="co">#&gt; # ℹ 29 more rows</span></span></code></pre></div> <p>Often the filename or directory where the files are stored contains information. The <code>id</code> parameter can be used to add an extra column to the result with the full path to each file. (in this case we name the column <code>path</code>).</p> <div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" tabindex="-1"></a><span class="fu">vroom</span>(files, <span class="at">id =</span> <span class="st">&quot;path&quot;</span>)</span> <span id="cb5-2"><a href="#cb5-2" tabindex="-1"></a><span class="co">#&gt; Rows: 32 Columns: 13</span></span> <span id="cb5-3"><a href="#cb5-3" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb5-4"><a href="#cb5-4" tabindex="-1"></a><span class="co">#&gt; Delimiter: &quot;,&quot;</span></span> <span id="cb5-5"><a href="#cb5-5" tabindex="-1"></a><span class="co">#&gt; chr (1): model</span></span> <span id="cb5-6"><a href="#cb5-6" tabindex="-1"></a><span class="co">#&gt; dbl (11): mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb</span></span> <span id="cb5-7"><a href="#cb5-7" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb5-8"><a href="#cb5-8" tabindex="-1"></a><span class="co">#&gt; ℹ Use `spec()` to retrieve the full column specification for this data.</span></span> <span id="cb5-9"><a href="#cb5-9" tabindex="-1"></a><span class="co">#&gt; ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span> <span id="cb5-10"><a href="#cb5-10" tabindex="-1"></a><span class="co">#&gt; # A tibble: 32 × 13</span></span> <span id="cb5-11"><a href="#cb5-11" tabindex="-1"></a><span class="co">#&gt; path model mpg cyl disp hp drat wt qsec vs am gear carb</span></span> <span id="cb5-12"><a href="#cb5-12" tabindex="-1"></a><span class="co">#&gt; &lt;chr&gt; &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;</span></span> <span id="cb5-13"><a href="#cb5-13" tabindex="-1"></a><span class="co">#&gt; 1 /priv… Dats… 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1</span></span> <span id="cb5-14"><a href="#cb5-14" tabindex="-1"></a><span class="co">#&gt; 2 /priv… Merc… 24.4 4 147. 62 3.69 3.19 20 1 0 4 2</span></span> <span id="cb5-15"><a href="#cb5-15" tabindex="-1"></a><span class="co">#&gt; 3 /priv… Merc… 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2</span></span> <span id="cb5-16"><a href="#cb5-16" tabindex="-1"></a><span class="co">#&gt; # ℹ 29 more rows</span></span></code></pre></div> </div> <div id="reading-compressed-files" class="section level2"> <h2>Reading compressed files</h2> <p>vroom supports reading zip, gz, bz2 and xz compressed files automatically, just pass the filename of the compressed file to vroom.</p> <div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" tabindex="-1"></a>file <span class="ot">&lt;-</span> <span class="fu">vroom_example</span>(<span class="st">&quot;mtcars.csv.gz&quot;</span>)</span> <span id="cb6-2"><a href="#cb6-2" tabindex="-1"></a></span> <span id="cb6-3"><a href="#cb6-3" tabindex="-1"></a><span class="fu">vroom</span>(file)</span> <span id="cb6-4"><a href="#cb6-4" tabindex="-1"></a><span class="co">#&gt; Rows: 32 Columns: 12</span></span> <span id="cb6-5"><a href="#cb6-5" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb6-6"><a href="#cb6-6" tabindex="-1"></a><span class="co">#&gt; Delimiter: &quot;,&quot;</span></span> <span id="cb6-7"><a href="#cb6-7" tabindex="-1"></a><span class="co">#&gt; chr (1): model</span></span> <span id="cb6-8"><a href="#cb6-8" tabindex="-1"></a><span class="co">#&gt; dbl (11): mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb</span></span> <span id="cb6-9"><a href="#cb6-9" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb6-10"><a href="#cb6-10" tabindex="-1"></a><span class="co">#&gt; ℹ Use `spec()` to retrieve the full column specification for this data.</span></span> <span id="cb6-11"><a href="#cb6-11" tabindex="-1"></a><span class="co">#&gt; ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span> <span id="cb6-12"><a href="#cb6-12" tabindex="-1"></a><span class="co">#&gt; # A tibble: 32 × 12</span></span> <span id="cb6-13"><a href="#cb6-13" tabindex="-1"></a><span class="co">#&gt; model mpg cyl disp hp drat wt qsec vs am gear carb</span></span> <span id="cb6-14"><a href="#cb6-14" tabindex="-1"></a><span class="co">#&gt; &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;</span></span> <span id="cb6-15"><a href="#cb6-15" tabindex="-1"></a><span class="co">#&gt; 1 Mazda RX4 21 6 160 110 3.9 2.62 16.5 0 1 4 4</span></span> <span id="cb6-16"><a href="#cb6-16" tabindex="-1"></a><span class="co">#&gt; 2 Mazda RX4 W… 21 6 160 110 3.9 2.88 17.0 0 1 4 4</span></span> <span id="cb6-17"><a href="#cb6-17" tabindex="-1"></a><span class="co">#&gt; 3 Datsun 710 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1</span></span> <span id="cb6-18"><a href="#cb6-18" tabindex="-1"></a><span class="co">#&gt; # ℹ 29 more rows</span></span></code></pre></div> <p><code>vroom()</code> decompresses, indexes and writes the decompressed data to a file in the temp directory in a single stream. The temporary file is used to lazily look up the values and will be automatically cleaned up when all values in the object have been fully read, the object is removed, or the R session ends.</p> <div id="reading-individual-files-from-multiple-multi-file-zip-archives" class="section level3"> <h3>Reading individual files from multiple multi-file zip archives</h3> <p>If you are reading a zip file that contains multiple files with the same format, you can read a subset of the files at once like so:</p> <div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" tabindex="-1"></a>zip_file <span class="ot">&lt;-</span> <span class="fu">vroom_example</span>(<span class="st">&quot;mtcars-multi-cyl.zip&quot;</span>)</span> <span id="cb7-2"><a href="#cb7-2" tabindex="-1"></a>filenames <span class="ot">&lt;-</span> <span class="fu">unzip</span>(zip_file, <span class="at">list =</span> <span class="cn">TRUE</span>)<span class="sc">$</span>Name</span> <span id="cb7-3"><a href="#cb7-3" tabindex="-1"></a>filenames</span> <span id="cb7-4"><a href="#cb7-4" tabindex="-1"></a><span class="co">#&gt; [1] &quot;mtcars-4.csv&quot; &quot;mtcars-6.csv&quot; &quot;mtcars-8.csv&quot;</span></span> <span id="cb7-5"><a href="#cb7-5" tabindex="-1"></a></span> <span id="cb7-6"><a href="#cb7-6" tabindex="-1"></a><span class="co"># imagine we only want to read 2 of the 3 files</span></span> <span id="cb7-7"><a href="#cb7-7" tabindex="-1"></a><span class="fu">vroom</span>(purrr<span class="sc">::</span><span class="fu">map</span>(filenames[<span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">3</span>)], <span class="sc">~</span> <span class="fu">unz</span>(zip_file, .x)))</span> <span id="cb7-8"><a href="#cb7-8" tabindex="-1"></a><span class="co">#&gt; Rows: 25 Columns: 12</span></span> <span id="cb7-9"><a href="#cb7-9" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb7-10"><a href="#cb7-10" tabindex="-1"></a><span class="co">#&gt; Delimiter: &quot;,&quot;</span></span> <span id="cb7-11"><a href="#cb7-11" tabindex="-1"></a><span class="co">#&gt; chr (1): model</span></span> <span id="cb7-12"><a href="#cb7-12" tabindex="-1"></a><span class="co">#&gt; dbl (11): mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb</span></span> <span id="cb7-13"><a href="#cb7-13" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb7-14"><a href="#cb7-14" tabindex="-1"></a><span class="co">#&gt; ℹ Use `spec()` to retrieve the full column specification for this data.</span></span> <span id="cb7-15"><a href="#cb7-15" tabindex="-1"></a><span class="co">#&gt; ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span> <span id="cb7-16"><a href="#cb7-16" tabindex="-1"></a><span class="co">#&gt; # A tibble: 25 × 12</span></span> <span id="cb7-17"><a href="#cb7-17" tabindex="-1"></a><span class="co">#&gt; model mpg cyl disp hp drat wt qsec vs am gear carb</span></span> <span id="cb7-18"><a href="#cb7-18" tabindex="-1"></a><span class="co">#&gt; &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;</span></span> <span id="cb7-19"><a href="#cb7-19" tabindex="-1"></a><span class="co">#&gt; 1 Datsun 710 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1</span></span> <span id="cb7-20"><a href="#cb7-20" tabindex="-1"></a><span class="co">#&gt; 2 Merc 240D 24.4 4 147. 62 3.69 3.19 20 1 0 4 2</span></span> <span id="cb7-21"><a href="#cb7-21" tabindex="-1"></a><span class="co">#&gt; 3 Merc 230 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2</span></span> <span id="cb7-22"><a href="#cb7-22" tabindex="-1"></a><span class="co">#&gt; # ℹ 22 more rows</span></span></code></pre></div> </div> </div> <div id="reading-remote-files" class="section level2"> <h2>Reading remote files</h2> <p>vroom can read files directly from the internet as well by passing the URL of the file to vroom.</p> <div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" tabindex="-1"></a>file <span class="ot">&lt;-</span> <span class="st">&quot;https://raw.githubusercontent.com/tidyverse/vroom/main/inst/extdata/mtcars.csv&quot;</span></span> <span id="cb8-2"><a href="#cb8-2" tabindex="-1"></a><span class="fu">vroom</span>(file)</span></code></pre></div> <p>It can even read gzipped files from the internet (although not the other compressed formats).</p> <div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" tabindex="-1"></a>file <span class="ot">&lt;-</span> <span class="st">&quot;https://raw.githubusercontent.com/tidyverse/vroom/main/inst/extdata/mtcars.csv.gz&quot;</span></span> <span id="cb9-2"><a href="#cb9-2" tabindex="-1"></a><span class="fu">vroom</span>(file)</span></code></pre></div> </div> <div id="column-selection" class="section level2"> <h2>Column selection</h2> <p>vroom provides the same interface for column selection and renaming as <a href="https://dplyr.tidyverse.org/reference/select.html">dplyr::select()</a>. This provides very efficient, flexible and readable selections. For example you can select by:</p> <ul> <li>A character vector of column names</li> </ul> <div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" tabindex="-1"></a>file <span class="ot">&lt;-</span> <span class="fu">vroom_example</span>(<span class="st">&quot;mtcars.csv.gz&quot;</span>)</span> <span id="cb10-2"><a href="#cb10-2" tabindex="-1"></a></span> <span id="cb10-3"><a href="#cb10-3" tabindex="-1"></a><span class="fu">vroom</span>(file, <span class="at">col_select =</span> <span class="fu">c</span>(model, cyl, gear))</span> <span id="cb10-4"><a href="#cb10-4" tabindex="-1"></a><span class="co">#&gt; Rows: 32 Columns: 3</span></span> <span id="cb10-5"><a href="#cb10-5" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb10-6"><a href="#cb10-6" tabindex="-1"></a><span class="co">#&gt; Delimiter: &quot;,&quot;</span></span> <span id="cb10-7"><a href="#cb10-7" tabindex="-1"></a><span class="co">#&gt; chr (1): model</span></span> <span id="cb10-8"><a href="#cb10-8" tabindex="-1"></a><span class="co">#&gt; dbl (2): cyl, gear</span></span> <span id="cb10-9"><a href="#cb10-9" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb10-10"><a href="#cb10-10" tabindex="-1"></a><span class="co">#&gt; ℹ Use `spec()` to retrieve the full column specification for this data.</span></span> <span id="cb10-11"><a href="#cb10-11" tabindex="-1"></a><span class="co">#&gt; ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span> <span id="cb10-12"><a href="#cb10-12" tabindex="-1"></a><span class="co">#&gt; # A tibble: 32 × 3</span></span> <span id="cb10-13"><a href="#cb10-13" tabindex="-1"></a><span class="co">#&gt; model cyl gear</span></span> <span id="cb10-14"><a href="#cb10-14" tabindex="-1"></a><span class="co">#&gt; &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt;</span></span> <span id="cb10-15"><a href="#cb10-15" tabindex="-1"></a><span class="co">#&gt; 1 Mazda RX4 6 4</span></span> <span id="cb10-16"><a href="#cb10-16" tabindex="-1"></a><span class="co">#&gt; 2 Mazda RX4 Wag 6 4</span></span> <span id="cb10-17"><a href="#cb10-17" tabindex="-1"></a><span class="co">#&gt; 3 Datsun 710 4 4</span></span> <span id="cb10-18"><a href="#cb10-18" tabindex="-1"></a><span class="co">#&gt; # ℹ 29 more rows</span></span></code></pre></div> <ul> <li>A numeric vector of column indexes, e.g. <code>c(1, 2, 5)</code></li> </ul> <div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" tabindex="-1"></a><span class="fu">vroom</span>(file, <span class="at">col_select =</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">3</span>, <span class="dv">11</span>))</span> <span id="cb11-2"><a href="#cb11-2" tabindex="-1"></a><span class="co">#&gt; Rows: 32 Columns: 3</span></span> <span id="cb11-3"><a href="#cb11-3" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb11-4"><a href="#cb11-4" tabindex="-1"></a><span class="co">#&gt; Delimiter: &quot;,&quot;</span></span> <span id="cb11-5"><a href="#cb11-5" tabindex="-1"></a><span class="co">#&gt; chr (1): model</span></span> <span id="cb11-6"><a href="#cb11-6" tabindex="-1"></a><span class="co">#&gt; dbl (2): cyl, gear</span></span> <span id="cb11-7"><a href="#cb11-7" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb11-8"><a href="#cb11-8" tabindex="-1"></a><span class="co">#&gt; ℹ Use `spec()` to retrieve the full column specification for this data.</span></span> <span id="cb11-9"><a href="#cb11-9" tabindex="-1"></a><span class="co">#&gt; ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span> <span id="cb11-10"><a href="#cb11-10" tabindex="-1"></a><span class="co">#&gt; # A tibble: 32 × 3</span></span> <span id="cb11-11"><a href="#cb11-11" tabindex="-1"></a><span class="co">#&gt; model cyl gear</span></span> <span id="cb11-12"><a href="#cb11-12" tabindex="-1"></a><span class="co">#&gt; &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt;</span></span> <span id="cb11-13"><a href="#cb11-13" tabindex="-1"></a><span class="co">#&gt; 1 Mazda RX4 6 4</span></span> <span id="cb11-14"><a href="#cb11-14" tabindex="-1"></a><span class="co">#&gt; 2 Mazda RX4 Wag 6 4</span></span> <span id="cb11-15"><a href="#cb11-15" tabindex="-1"></a><span class="co">#&gt; 3 Datsun 710 4 4</span></span> <span id="cb11-16"><a href="#cb11-16" tabindex="-1"></a><span class="co">#&gt; # ℹ 29 more rows</span></span></code></pre></div> <ul> <li>Using the selection helpers such as <code>starts_with()</code> and <code>ends_with()</code></li> </ul> <div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" tabindex="-1"></a><span class="fu">vroom</span>(file, <span class="at">col_select =</span> <span class="fu">starts_with</span>(<span class="st">&quot;d&quot;</span>))</span> <span id="cb12-2"><a href="#cb12-2" tabindex="-1"></a><span class="co">#&gt; Rows: 32 Columns: 2</span></span> <span id="cb12-3"><a href="#cb12-3" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb12-4"><a href="#cb12-4" tabindex="-1"></a><span class="co">#&gt; Delimiter: &quot;,&quot;</span></span> <span id="cb12-5"><a href="#cb12-5" tabindex="-1"></a><span class="co">#&gt; dbl (2): disp, drat</span></span> <span id="cb12-6"><a href="#cb12-6" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb12-7"><a href="#cb12-7" tabindex="-1"></a><span class="co">#&gt; ℹ Use `spec()` to retrieve the full column specification for this data.</span></span> <span id="cb12-8"><a href="#cb12-8" tabindex="-1"></a><span class="co">#&gt; ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span> <span id="cb12-9"><a href="#cb12-9" tabindex="-1"></a><span class="co">#&gt; # A tibble: 32 × 2</span></span> <span id="cb12-10"><a href="#cb12-10" tabindex="-1"></a><span class="co">#&gt; disp drat</span></span> <span id="cb12-11"><a href="#cb12-11" tabindex="-1"></a><span class="co">#&gt; &lt;dbl&gt; &lt;dbl&gt;</span></span> <span id="cb12-12"><a href="#cb12-12" tabindex="-1"></a><span class="co">#&gt; 1 160 3.9 </span></span> <span id="cb12-13"><a href="#cb12-13" tabindex="-1"></a><span class="co">#&gt; 2 160 3.9 </span></span> <span id="cb12-14"><a href="#cb12-14" tabindex="-1"></a><span class="co">#&gt; 3 108 3.85</span></span> <span id="cb12-15"><a href="#cb12-15" tabindex="-1"></a><span class="co">#&gt; # ℹ 29 more rows</span></span></code></pre></div> <ul> <li>You can also rename columns</li> </ul> <div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" tabindex="-1"></a><span class="fu">vroom</span>(file, <span class="at">col_select =</span> <span class="fu">c</span>(<span class="at">car =</span> model, <span class="fu">everything</span>()))</span> <span id="cb13-2"><a href="#cb13-2" tabindex="-1"></a><span class="co">#&gt; Rows: 32 Columns: 12</span></span> <span id="cb13-3"><a href="#cb13-3" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb13-4"><a href="#cb13-4" tabindex="-1"></a><span class="co">#&gt; Delimiter: &quot;,&quot;</span></span> <span id="cb13-5"><a href="#cb13-5" tabindex="-1"></a><span class="co">#&gt; chr (1): model</span></span> <span id="cb13-6"><a href="#cb13-6" tabindex="-1"></a><span class="co">#&gt; dbl (11): mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb</span></span> <span id="cb13-7"><a href="#cb13-7" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb13-8"><a href="#cb13-8" tabindex="-1"></a><span class="co">#&gt; ℹ Use `spec()` to retrieve the full column specification for this data.</span></span> <span id="cb13-9"><a href="#cb13-9" tabindex="-1"></a><span class="co">#&gt; ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span> <span id="cb13-10"><a href="#cb13-10" tabindex="-1"></a><span class="co">#&gt; # A tibble: 32 × 12</span></span> <span id="cb13-11"><a href="#cb13-11" tabindex="-1"></a><span class="co">#&gt; car mpg cyl disp hp drat wt qsec vs am gear carb</span></span> <span id="cb13-12"><a href="#cb13-12" tabindex="-1"></a><span class="co">#&gt; &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;</span></span> <span id="cb13-13"><a href="#cb13-13" tabindex="-1"></a><span class="co">#&gt; 1 Mazda RX4 21 6 160 110 3.9 2.62 16.5 0 1 4 4</span></span> <span id="cb13-14"><a href="#cb13-14" tabindex="-1"></a><span class="co">#&gt; 2 Mazda RX4 W… 21 6 160 110 3.9 2.88 17.0 0 1 4 4</span></span> <span id="cb13-15"><a href="#cb13-15" tabindex="-1"></a><span class="co">#&gt; 3 Datsun 710 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1</span></span> <span id="cb13-16"><a href="#cb13-16" tabindex="-1"></a><span class="co">#&gt; # ℹ 29 more rows</span></span></code></pre></div> </div> <div id="reading-fixed-width-files" class="section level2"> <h2>Reading fixed width files</h2> <p>A fixed width file can be a very compact representation of numeric data. Unfortunately, it’s also often painful to read because you need to describe the length of every field. vroom aims to make it as easy as possible by providing a number of different ways to describe the field structure. Use <code>vroom_fwf()</code> in conjunction with one of the following helper functions to read the file.</p> <div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" tabindex="-1"></a>fwf_sample <span class="ot">&lt;-</span> <span class="fu">vroom_example</span>(<span class="st">&quot;fwf-sample.txt&quot;</span>)</span> <span id="cb14-2"><a href="#cb14-2" tabindex="-1"></a><span class="fu">cat</span>(<span class="fu">readLines</span>(fwf_sample))</span> <span id="cb14-3"><a href="#cb14-3" tabindex="-1"></a><span class="co">#&gt; John Smith WA 418-Y11-4111 Mary Hartford CA 319-Z19-4341 Evan Nolan IL 219-532-c301</span></span></code></pre></div> <ul> <li><code>fwf_empty()</code> - Guess based on the position of empty columns.</li> </ul> <div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" tabindex="-1"></a><span class="fu">vroom_fwf</span>(fwf_sample, <span class="fu">fwf_empty</span>(fwf_sample, <span class="at">col_names =</span> <span class="fu">c</span>(<span class="st">&quot;first&quot;</span>, <span class="st">&quot;last&quot;</span>, <span class="st">&quot;state&quot;</span>, <span class="st">&quot;ssn&quot;</span>)))</span> <span id="cb15-2"><a href="#cb15-2" tabindex="-1"></a><span class="co">#&gt; Rows: 3 Columns: 4</span></span> <span id="cb15-3"><a href="#cb15-3" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb15-4"><a href="#cb15-4" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb15-5"><a href="#cb15-5" tabindex="-1"></a><span class="co">#&gt; chr (4): first, last, state, ssn</span></span> <span id="cb15-6"><a href="#cb15-6" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb15-7"><a href="#cb15-7" tabindex="-1"></a><span class="co">#&gt; ℹ Use `spec()` to retrieve the full column specification for this data.</span></span> <span id="cb15-8"><a href="#cb15-8" tabindex="-1"></a><span class="co">#&gt; ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span> <span id="cb15-9"><a href="#cb15-9" tabindex="-1"></a><span class="co">#&gt; # A tibble: 3 × 4</span></span> <span id="cb15-10"><a href="#cb15-10" tabindex="-1"></a><span class="co">#&gt; first last state ssn </span></span> <span id="cb15-11"><a href="#cb15-11" tabindex="-1"></a><span class="co">#&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; </span></span> <span id="cb15-12"><a href="#cb15-12" tabindex="-1"></a><span class="co">#&gt; 1 John Smith WA 418-Y11-4111</span></span> <span id="cb15-13"><a href="#cb15-13" tabindex="-1"></a><span class="co">#&gt; 2 Mary Hartford CA 319-Z19-4341</span></span> <span id="cb15-14"><a href="#cb15-14" tabindex="-1"></a><span class="co">#&gt; 3 Evan Nolan IL 219-532-c301</span></span></code></pre></div> <ul> <li><code>fwf_widths()</code> - Use user provided set of field widths.</li> </ul> <div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" tabindex="-1"></a><span class="fu">vroom_fwf</span>(fwf_sample, <span class="fu">fwf_widths</span>(<span class="fu">c</span>(<span class="dv">20</span>, <span class="dv">10</span>, <span class="dv">12</span>), <span class="fu">c</span>(<span class="st">&quot;name&quot;</span>, <span class="st">&quot;state&quot;</span>, <span class="st">&quot;ssn&quot;</span>)))</span> <span id="cb16-2"><a href="#cb16-2" tabindex="-1"></a><span class="co">#&gt; Rows: 3 Columns: 3</span></span> <span id="cb16-3"><a href="#cb16-3" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb16-4"><a href="#cb16-4" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb16-5"><a href="#cb16-5" tabindex="-1"></a><span class="co">#&gt; chr (3): name, state, ssn</span></span> <span id="cb16-6"><a href="#cb16-6" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb16-7"><a href="#cb16-7" tabindex="-1"></a><span class="co">#&gt; ℹ Use `spec()` to retrieve the full column specification for this data.</span></span> <span id="cb16-8"><a href="#cb16-8" tabindex="-1"></a><span class="co">#&gt; ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span> <span id="cb16-9"><a href="#cb16-9" tabindex="-1"></a><span class="co">#&gt; # A tibble: 3 × 3</span></span> <span id="cb16-10"><a href="#cb16-10" tabindex="-1"></a><span class="co">#&gt; name state ssn </span></span> <span id="cb16-11"><a href="#cb16-11" tabindex="-1"></a><span class="co">#&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; </span></span> <span id="cb16-12"><a href="#cb16-12" tabindex="-1"></a><span class="co">#&gt; 1 John Smith WA 418-Y11-4111</span></span> <span id="cb16-13"><a href="#cb16-13" tabindex="-1"></a><span class="co">#&gt; 2 Mary Hartford CA 319-Z19-4341</span></span> <span id="cb16-14"><a href="#cb16-14" tabindex="-1"></a><span class="co">#&gt; 3 Evan Nolan IL 219-532-c301</span></span></code></pre></div> <ul> <li><code>fwf_positions()</code> - Use user provided sets of start and end positions.</li> </ul> <div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" tabindex="-1"></a><span class="fu">vroom_fwf</span>(fwf_sample, <span class="fu">fwf_positions</span>(<span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">30</span>), <span class="fu">c</span>(<span class="dv">20</span>, <span class="dv">42</span>), <span class="fu">c</span>(<span class="st">&quot;name&quot;</span>, <span class="st">&quot;ssn&quot;</span>)))</span> <span id="cb17-2"><a href="#cb17-2" tabindex="-1"></a><span class="co">#&gt; Rows: 3 Columns: 2</span></span> <span id="cb17-3"><a href="#cb17-3" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb17-4"><a href="#cb17-4" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb17-5"><a href="#cb17-5" tabindex="-1"></a><span class="co">#&gt; chr (2): name, ssn</span></span> <span id="cb17-6"><a href="#cb17-6" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb17-7"><a href="#cb17-7" tabindex="-1"></a><span class="co">#&gt; ℹ Use `spec()` to retrieve the full column specification for this data.</span></span> <span id="cb17-8"><a href="#cb17-8" tabindex="-1"></a><span class="co">#&gt; ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span> <span id="cb17-9"><a href="#cb17-9" tabindex="-1"></a><span class="co">#&gt; # A tibble: 3 × 2</span></span> <span id="cb17-10"><a href="#cb17-10" tabindex="-1"></a><span class="co">#&gt; name ssn </span></span> <span id="cb17-11"><a href="#cb17-11" tabindex="-1"></a><span class="co">#&gt; &lt;chr&gt; &lt;chr&gt; </span></span> <span id="cb17-12"><a href="#cb17-12" tabindex="-1"></a><span class="co">#&gt; 1 John Smith 418-Y11-4111</span></span> <span id="cb17-13"><a href="#cb17-13" tabindex="-1"></a><span class="co">#&gt; 2 Mary Hartford 319-Z19-4341</span></span> <span id="cb17-14"><a href="#cb17-14" tabindex="-1"></a><span class="co">#&gt; 3 Evan Nolan 219-532-c301</span></span></code></pre></div> <ul> <li><code>fwf_cols()</code> - Use user provided named widths.</li> </ul> <div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" tabindex="-1"></a><span class="fu">vroom_fwf</span>(fwf_sample, <span class="fu">fwf_cols</span>(<span class="at">name =</span> <span class="dv">20</span>, <span class="at">state =</span> <span class="dv">10</span>, <span class="at">ssn =</span> <span class="dv">12</span>))</span> <span id="cb18-2"><a href="#cb18-2" tabindex="-1"></a><span class="co">#&gt; Rows: 3 Columns: 3</span></span> <span id="cb18-3"><a href="#cb18-3" tabindex="-1"></a><span class="co">#&gt; ── Column specification ────────────────────────────────────────────────────────</span></span> <span id="cb18-4"><a href="#cb18-4" tabindex="-1"></a><span class="co">#&gt; </span></span> <span id="cb18-5"><a href="#cb18-5" tabindex="-1"></a><span class="co">#&gt; chr (3): name, state, ssn</span>