enex-dump
Version:
Dump the content of .enex files, preserving attachments, some metadata and optionally converting notes to Markdown.
199 lines (157 loc) • 6.97 kB
JavaScript
/* IMPORT */
const _ = require ( 'lodash' ),
matter = require ( 'gray-matter' ),
{html: beautifyHTML} = require ( 'js-beautify' ),
turndown = require ( './_turndown' ),
Matter = require ( './matter' );
/* CONTENT */
const Content = {
format: {
html ( html, title, beautify = true ) {
html = html.replace ( /<!DOCTYPE(.*?)>/g, '' ) // Remove doctype
.replace ( /<\?xml(.*?)>/g, '' ) // Remove xml thing (what's it called?)
.replace ( /<en-todo checked="true"(.*?)\/?>/g, '<input type="checkbox" checked />' ) // Replace checked checkbox
.replace ( /<en-todo checked="false"(.*?)\/?>/g, '<input type="checkbox" />' ) // Replace unchecked checkbox
.replace ( /<\/?en-(\w+)(.*?)>/g, '' ) // Remove custom evernote tags
.replace ( /<div>(\s*)<\/div>/g, '' ) // Remove empty divs
.replace ( /(<div>(\s*)<br ?\/>(\s*)<\/div>){2,}/g, '<div><br /></div>' ); // Remove extra line breaks
if ( title ) {
html = `<h1>${title}</h1>${html}`;
}
if ( beautify ) {
html = beautifyHTML ( html );
}
return html.replace ( /^\s*/, '\n' ) // Ensure it starts with a new line
.replace ( /\s*$/, '\n' ); // Ensure it ends with a new line
},
async markdown ( html, title ) {
html = Content.format.html ( html, title, false );
html = html.replace ( /<input(.*?)type="checkbox"([^>]*?)checked(.*?)>/g, ' [x] ' ) // Replace checked checkbox
.replace ( /<input(.*?)type="checkbox"(.*?)>/g, ' [ ] ' ); // Replace unchecked checkbox
const service = new turndown ({
bulletListMarker: '-',
headingStyle: 'atx',
hr: '---'
});
service.addRule ( 'alignment', {
filter: node => node.nodeName !== 'TABLE' && ( node.getAttribute ( 'style' ) || '' ).includes ( 'text-align:' ),
replacement: ( str, ele ) => {
str = str.trim ();
if ( !str.length ) return '';
const style = ele.getAttribute ( 'style' );
const alignment = style.match ( /text-align:\s*(\S+);/ );
if ( !alignment ) return `${_.trim ( str )}\n\n`; //TODO: The regex isn't good enough if we reach here
return `<p align="${alignment[1]}">${_.trim ( str )}</p>\n\n`;
}
});
service.addRule ( 'code', {
filter: node => node.nodeName === 'DIV' && ( node.getAttribute ( 'style' ) || '' ).includes ( '-en-codeblock' ),
replacement: str => {
str = str.trim ();
if ( !str.length ) return '';
str = _.trim ( str ).replace ( /<(?:.|\n)*?>/gm, '' );
str = str.includes ( '\n' ) ? `\n\n\`\`\`\n${str}\n\`\`\`\n` : `\`${str}\``;
return str;
}
});
service.addRule ( 'others', {
filter: ['font', 'span'],
replacement: ( str, ele ) => {
if ( !_.trim ( str ) ) return '';
/* STYLE */
const style = ele.getAttribute ( 'style' );
let newStyle = '';
if ( style ) {
/* FORMATTING */
if ( style.match ( /text-decoration: underline/ ) ) { // Underline
str = `<u>${str}</u>`;
}
if ( style.match ( /text-decoration: line-through/ ) ) { // Strikethrough
str = `<s>${str}</s>`;
}
if ( style.match ( /font-style: italic/ ) ) { // Italic
str = `_${str}_`;
}
if ( style.match ( /font-weight: bold/ ) ) { // Bold
str = `**${str}**`;
}
/* HEADING */
if ( str.match ( /^[^#]|>#/ ) ) { // Doesn't contain an heading
if ( style.match ( /font-size: (48|64|72)px/ ) ) { // H1
str = `# ${str}`;
}
if ( style.match ( /font-size: 36px/ ) ) { // H2
str = `## ${str}`;
}
if ( style.match ( /font-size: 24px/ ) ) { // H3
str = `### ${str}`;
}
if ( style.match ( /font-size: (12|13)px/ ) ) { // Small
str = `<small>${str}</small>`;
}
if ( style.match ( /font-size: (9|10|11)px/ ) ) { // Very Small
str = `<small><small>${str}</small></small>`;
}
}
/* BACKGROUND COLOR */
const backgroundColor = style.match ( /background-color: ([^;]+);/ );
if ( backgroundColor && backgroundColor[1] !== 'rgb(255, 255, 255)' ) {
newStyle += backgroundColor[0];
}
}
/* COLOR */
const colorAttr = ele.getAttribute ( 'color' ); // Color
if ( colorAttr && colorAttr !== '#010101' ) {
newStyle += `color: ${colorAttr};`
}
if ( style ) {
const colorStyle = style.match ( /[^-]color: ([^;]+);/ );
if ( colorStyle && colorStyle[1] !== '#010101' ) {
newStyle += `color: ${colorStyle[1]};`;
}
}
/* NEW STYLE */
if ( newStyle ) {
str = `<span style="${newStyle}">${str}</span>`;
}
return str;
}
});
service.keep ([ 'kbd' ]);
service.keep ([ 'b', 'i', 's', 'u' ]); // 😚
return service.turndown ( html )
.replace ( /\\((-|\*|\+) )/g, '$1' ) // Unescape unordered lists
.replace ( /^(-|\*|\+)\s+/gm, '$1 ' ) // Remove extra whitespace from unordered lists
.replace ( /^((?:-|\*|\+) .*)\n\n(?=(?:-|\*|\+) )/gm, '$1\n' ) // Remove extra whitespace between unordered lists items
.replace ( /^(\d+\.)\s+/gm, '$1 ' ) // Remove extra whitespace from ordered lists
.replace ( /\\\[([^\]]*?)\\\] /g, '[$1] ' ) // Unescape square brackets
// .replace ( /(\s*\n\s*){4,}/g, '\n\n<br />\n\n' ) // Add line breaks
.replace ( /(\s*\n\s*){3,}/g, '\n\n' ) // Remove extra new lines
.replace ( /\n\n<br \/>\n\n(-|\*|\+) /g, '\n\n$1 ' ) // Remove line breaks before lists
.replace ( /^\s*/, '\n' ) // Ensure it starts with a new line
.replace ( /\s*$/, '\n' ); // Ensure it ends with a new line
}
},
metadata: {
options: {
engines: {
yaml: Matter
}
},
get ( content ) {
return matter ( content, Content.metadata.options ).data;
},
set ( content, metadata ) {
content = Content.metadata.remove ( content );
if ( !_.isEmpty ( metadata ) ) {
content = matter.stringify ( content, metadata, Content.metadata.options );
}
return content;
},
remove ( content ) {
return matter ( content, Content.metadata.options ).content;
}
}
};
/* EXPORT */
module.exports = Content;