@tiddlygit/tiddlywiki
Version:
a non-linear personal web notebook
479 lines (446 loc) • 14.9 kB
JavaScript
/*\
title: $:/plugins/tiddlywiki/text-slicer/modules/slicer.js
type: application/javascript
module-type: library
Slice a tiddler or DOM document into individual tiddlers
var slicer = new textSlicer.Slicer(doc,{
slicerRules: JSON data defining slicer rules -or- title of rules taken from tiddlers tagged $:/tags/text-slicer/slicer-rules
sourceTiddlerTitle: tiddler to slice -or-
sourceText: text to slice
outputMode: "html" (default) -or- "wiki"
baseTiddlerTitle: "MySlicedTiddlers-"
role: "sliced-content"
callback: function(err,tiddlers)
});
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
function Slicer(options) {
// Quick tests
this.testSlicerRuleMatching();
// Marshal parameters
this.sourceTiddlerTitle = options.sourceTiddlerTitle;
this.sourceText = options.sourceText;
this.wiki = options.wiki;
this.role = options.role || "sliced-html";
this.outputMode = options.outputMode || "html";
this.escapeWikiText = options.escapeWikiText || false;
this.callbackFn = options.callback;
// Get the slicer rules
var nameSlicerRules = null;
if(!options.slicerRules) {
nameSlicerRules = "html-by-paragraph";
this.slicerRules = this.loadSlicerRules(nameSlicerRules);
} else if(typeof options.slicerRules === "string") {
nameSlicerRules = options.slicerRules;
this.slicerRules = this.loadSlicerRules(nameSlicerRules);
} else {
this.slicerRules = options.slicerRules;
}
// Set up the base tiddler title
this.baseTiddlerTitle = this.getBaseTiddlerTitle(options.baseTiddlerTitle);
// Initialise state
this.namespaces = {}; // Hashmap of URLs
this.chunks = []; // Array of tiddlers without titles, addressed by their index. We use the title field to hold the plain text content
this.currentChunk = null; // Index of the chunk currently being written to
this.parentStack = []; // Stack of parent chunks {chunk: chunk index,actions:}
this.elementStack = []; // Stack of {tag:,isSelfClosing:,actions:}
this.titleCounts = {}; // Hashmap of counts of prefixed titles that have been issued
// Set up the document tiddler as top level heading
this.chunks.push({
"toc-type": "document",
title: this.baseTiddlerTitle,
text: "<div class='tc-table-of-contents'><<toc-selective-expandable \"\"\"" + this.baseTiddlerTitle + "document\"\"\">></div>",
list: [],
tags: [],
role: this.role,
"slicer-rules": nameSlicerRules,
"slicer-output-mode": this.outputMode
});
this.parentStack.push({chunk: 0, actions: this.getMatchingSlicerRuleActions("(document)")});
this.insertPrecedingChunk({
"toc-type": "anchor",
"title": this.baseTiddlerTitle + "-anchor-"
});
// Set up the parser
var sax = require("$:/plugins/tiddlywiki/sax/sax.js");
this.sax = sax.parser(false,{
xmlns: true,
lowercase: true
});
this.sax.onerror = this.onError.bind(this);
this.sax.onopennamespace = this.onOpenNamespace.bind(this);
this.sax.onclosenamespace = this.onCloseNamespace.bind(this);
this.sax.onopentag = this.onOpenTag.bind(this);
this.sax.onclosetag = this.onCloseTag.bind(this);
this.sax.ontext = this.onText.bind(this);
this.sax.onend = this.onEnd.bind(this);
// Start streaming the data
this.sax.write(this.getSourceText());
this.sax.close();
}
Slicer.prototype.callback = function(err,tiddlers) {
var self = this;
$tw.utils.nextTick(function() {
self.callbackFn(err,tiddlers);
});
};
Slicer.prototype.loadSlicerRules = function(name) {
// Collect the available slicer rule tiddlers
var self = this,
titles = this.wiki.getTiddlersWithTag("$:/tags/text-slicer/slicer-rules"),
tiddlers = {},
rules = {},
ruleNames = [];
titles.forEach(function(title) {
var tiddler = self.wiki.getTiddler(title);
tiddlers[tiddler.fields.name] = tiddler;
rules[tiddler.fields.name] = self.wiki.getTiddlerData(title,[]);
});
// Follow the inheritance trail to get a stack of slicer rule names
var n = name;
do {
ruleNames.push(n);
n = tiddlers[n] && tiddlers[n].fields["inherits-from"];
} while(n && ruleNames.indexOf(n) === -1);
// Concatenate the slicer rules
rules = ruleNames.reduce(function(accumulator,name) {
return accumulator.concat(rules[name]);
},[]);
return rules;
};
Slicer.prototype.getMatchingSlicerRuleActions = function(name) {
var rule = this.searchSlicerRules(name,this.slicerRules,this.elementStack);
if(!rule) {
return {};
} else {
return rule.actions;
}
};
Slicer.prototype.testSlicerRuleMatching = function() {
var tests = [
{
test: this.searchSlicerRules("title",[
{selector: "title,head,body", rules: true},
{selector: "body", rules: true}
],[
{tag:"head"}
]),
result: "title,head,body"
},
{
test: this.searchSlicerRules("body",[
{selector: "title,head,body", rules: true},
{selector: "body", rules: true}
],[
{tag:"head"}
]),
result: "title,head,body"
},
{
test: this.searchSlicerRules("title",[
{selector: "head > title", rules: true},
{selector: "title", rules: true}
],[
{tag:"head"}
]),
result: "head > title"
}
],
results = tests.forEach(function(test,index) {
if(test.test.selector !== test.result) {
throw "Failing test " + index + ", returns " + test.test.selector + " instead of " + test.result;
}
});
};
Slicer.prototype.searchSlicerRules = function(name,rules,elementStack) {
return rules.find(function(rule) {
// Split and trim the selectors for this rule
return !!rule.selector.split(",").map(function(selector) {
return selector.trim();
// Find the first selector that matches, if any
}).find(function(selector) {
// Split and trim the parts of the selector
var parts = selector.split(" ").map(function(part) {
return part.trim();
});
// * matches any element
if(parts.length === 1 && parts[0] === "*") {
return true;
}
// Make a copy of the element stack so that we can be destructive
var elements = elementStack.slice(0).concat({tag: name}),
nextElementMustBeAtTopOfStack = true,
currentPart = parts.length - 1;
while(currentPart >= 0) {
if(parts[currentPart] === ">") {
nextElementMustBeAtTopOfStack = true;
} else {
if(!nextElementMustBeAtTopOfStack) {
while(elements.length > 0 && elements[elements.length - 1].tag !== parts[currentPart]) {
elements.pop();
}
}
if(elements.length === 0 || elements[elements.length - 1].tag !== parts[currentPart]) {
return false;
}
elements.pop();
nextElementMustBeAtTopOfStack = false;
}
currentPart--;
}
return true;
});
});
};
Slicer.prototype.getBaseTiddlerTitle = function(baseTiddlerTitle) {
if(baseTiddlerTitle) {
return baseTiddlerTitle
} else {
if(this.sourceTiddlerTitle) {
return "Sliced up " + this.sourceTiddlerTitle + ":";
} else {
return "SlicedTiddler";
}
}
};
Slicer.prototype.getSourceText = function() {
if(this.sourceTiddlerTitle) {
var tiddler = this.wiki.getTiddler(this.sourceTiddlerTitle);
if(!tiddler) {
console.log("Tiddler '" + this.sourceTiddlerTitle + "' does not exist");
return "";
}
if(tiddler.fields.type === "text/html" || tiddler.fields.type === "text/xml" || (tiddler.fields.type || "").slice(-4) === "+xml") {
return tiddler.fields.text;
} else {
return this.getTiddlerAsHtml(tiddler);
}
} else {
return this.sourceText;
}
};
Slicer.prototype.getTiddlerAsHtml = function(tiddler) {
var widgetNode = this.wiki.makeTranscludeWidget(tiddler.fields.title,{
document: $tw.fakeDocument,
parseAsInline: false,
importPageMacros: true}),
container = $tw.fakeDocument.createElement("div");
widgetNode.render(container,null);
return ["