mta-wiki-parser
Version:
Wiki to Discord parser for Multi Theft Auto Wiki: https://wiki.multitheftauto.com/
190 lines (189 loc) • 6.96 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
var ParsedParagraphType;
(function (ParsedParagraphType) {
ParsedParagraphType["Codeblock"] = "Codeblock";
ParsedParagraphType["Text"] = "Text";
})(ParsedParagraphType = exports.ParsedParagraphType || (exports.ParsedParagraphType = {}));
var ParsedArticleType;
(function (ParsedArticleType) {
ParsedArticleType["SharedFunction"] = "Shared Function";
ParsedArticleType["ClientFunction"] = "Client Function";
ParsedArticleType["ServerFunction"] = "Server Function";
ParsedArticleType["ClientEvent"] = "Client Event";
ParsedArticleType["ServerEvent"] = "Server Event";
ParsedArticleType["UsefulFunction"] = "Useful Function";
ParsedArticleType["GenericPage"] = "Page";
})(ParsedArticleType = exports.ParsedArticleType || (exports.ParsedArticleType = {}));
class Parser {
static parse(article) {
const parsedArticle = {
type: getArticleType(article),
id: article.id,
url: article.url,
title: article.title,
sections: parseSections(article.rawText, article.sections),
categories: article.categories,
image: article.image,
};
return parsedArticle;
}
}
exports.Parser = Parser;
function parseSections(text, sections) {
let startByte = 0;
let endByte;
const parsedSections = [];
for (const [index, section] of sections.entries()) {
let parsedSection;
if (index === 0) {
// First section doesnt get returned by wiki api. Parse it under "Description"
parsedSection = {
title: 'Description',
paragraphs: parseParagraphs(text.substring(startByte, section.byteoffset)),
};
parsedSections.push(parsedSection);
}
startByte = section.byteoffset;
endByte = sections.length > index + 1 ? sections[index + 1].byteoffset : undefined;
parsedSection = {
title: section.line,
paragraphs: parseParagraphs(text.substring(startByte, endByte)),
};
parsedSections.push(parsedSection);
}
return parsedSections;
}
function parseParagraphs(text) {
const paragraphs = [];
// Remove first line
text = text.substring(text.indexOf('\n') + 1);
// Remove magic words
text = removeMagicWords(text);
// Replace styling
text = replaceStyling(text);
// Convert <section ="name".. /> to ==Name==, Remove </section>
text = text.replace(/<section name="([\s\S]*?)"[\s\S]*?>/g, '==$1==').replace(/<\/section>/g, '');
// Parse templates
text = parseTemplates(text);
// Remove templates
text = removeTemplates(text);
// Parse codeblocks
text = parseCodeblock(text);
// Limit blank lines to 1 in a row
text = text.replace(/^\s*$(?:\r\n?|\n){2,}/gm, '');
// Loop ==Name== to named paragraph
const split = text.split(/(==.*?==)/g);
let lastParagraphName = false;
for (const i in split) {
if (/(==.*?==)/.test(split[i])) {
// Is a section name
lastParagraphName = split[i].replace(/=/g, '');
}
else {
// Split by =.=codeblock=.= to set codeblock type
const splitParagraphText = split[i].split('=.=codeblock=.=');
for (let field of splitParagraphText) {
field = field.trim();
if (field.length === 0) {
continue;
}
let type = ParsedParagraphType.Text;
if (field.startsWith('```')) {
type = ParsedParagraphType.Codeblock;
}
const parsedParagraph = {
title: lastParagraphName,
type,
text: field,
};
paragraphs.push(parsedParagraph);
}
if (typeof lastParagraphName === 'string') {
lastParagraphName = false;
}
}
}
return paragraphs;
}
// Templates parse
const oopTemplate_1 = require("./templates/oopTemplate");
const Note_1 = require("./templates/Note");
const Warning_1 = require("./templates/Warning");
const Deprecated_1 = require("./templates/Deprecated");
const NewFeatureItem_1 = require("./templates/NewFeatureItem");
const DoubleBracketLinks_1 = require("./templates/DoubleBracketLinks");
const HtmlComments_1 = require("./templates/HtmlComments");
const templateParsers = [
(text) => {
return HtmlComments_1.parse(text);
},
(text) => {
return NewFeatureItem_1.parse(text);
},
(text) => {
return oopTemplate_1.parse(text);
},
(text) => {
return Note_1.parse(text);
},
(text) => {
return Warning_1.parse(text);
},
(text) => {
return Deprecated_1.parse(text);
},
(text) => {
return DoubleBracketLinks_1.parse(text);
},
];
function parseTemplates(text) {
for (const fn of templateParsers) {
text = fn(text);
}
return text;
}
function removeTemplates(text) {
return text.replace(/{{[\s\S]+?}}/g, '');
}
function replaceStyling(text) {
return text
.replace(/\*/g, '\u2022') // * to Unicode Character 'BULLET'
.replace(/'''''(.*?)'''''/g, '***$1***') // Bold and italic
.replace(/''''(.*?)''''/g, "**'$1'**") // 'bold'
.replace(/'''(.*?)'''/g, '**$1**') // bold
.replace(/''(.*?)''/g, '*$1*'); // italic
}
function removeMagicWords(text) {
return text.replace(/__.*?__/g, '');
}
function parseCodeblock(text) {
return text.replace(/<syntaxhighlight lang="(.*?)">([\s\S]+?)<\/syntaxhighlight>/g, '=.=codeblock=.=\n```$1\n$2\n```\n=.=codeblock=.=');
}
function getArticleType(article) {
// We determine type from categories
// Because of this, it may return false positives based on how well the wiki articles are organized
const cats = [];
for (const cat of article.categories) {
cats.push(cat.toLowerCase());
}
if (cats.includes('server_functions') && cats.includes('client_functions')) {
return ParsedArticleType.SharedFunction;
}
else if (cats.includes('useful_functions')) {
return ParsedArticleType.UsefulFunction;
}
else if (cats.includes('client_functions')) {
return ParsedArticleType.ClientFunction;
}
else if (cats.includes('server_functions')) {
return ParsedArticleType.ServerFunction;
}
else if (cats.includes('client_events')) {
return ParsedArticleType.ClientEvent;
}
else if (cats.includes('server_events')) {
return ParsedArticleType.ServerEvent;
}
return ParsedArticleType.GenericPage;
}