UNPKG

rdd.js

Version:

A node.js lib to process files/data like spark rdd.

39 lines (30 loc) 1.4 kB
const RDD = require('..'); const os = require('os'); (async function () { const rdd = RDD.fromTextFile(`${__dirname}/iterable.js`); const cntRdd = rdd.filter(l => l) .map(l => ({ lineWordCount: l.split(/\s+/).length, isLongStatement: l.split(/\s+/).length > 10 })); console.log(`==========${__dirname}/iterable.js==========`); await rdd.filter(l => l).forEach(console.log); console.log(`==========count rdd origin content==========`); await cntRdd.forEach(console.log); rdd.saveAsTextFile(`${os.tmpdir()}/foo.txt`); await cntRdd.saveAsJsonFile(`${os.tmpdir()}/foo.json`); console.log('==========foo.json=========='); await RDD.fromJsonFile(`${os.tmpdir()}/foo.json`).forEach(console.log); await cntRdd.saveAsCsvFile(`${os.tmpdir()}/foo.csv`); console.log(`==========foo.csv==========`); const csvRdd = RDD.fromCsvFile(`${os.tmpdir()}/foo.csv`); await csvRdd.forEach(console.log); await csvRdd.saveAsJsonFile(`${os.tmpdir()}/test.json`); console.log('=========test.json========='); await RDD.fromJsonFile(`${os.tmpdir()}/test.json`).forEach(console.log); console.log('==========specify header=========='); await csvRdd.saveAsCsvFile(`${os.tmpdir()}/test.csv`, { header: false }); await RDD .fromCsvFile(`${os.tmpdir()}/test.csv`, { columns: ['wcnt', 'lf'] }) .forEach(console.log); })().catch(e => console.log(e.stack));