UNPKG

hexo-related-posts

Version:

Hexo plugin that generates related posts list with TF/IDF algorithm

127 lines (107 loc) 6.07 kB
let magenta, blue; import('chalk').then(({ default: chalk }) => { ({ magenta, blue } = chalk); }); const prepareReservedWords = require('./text/prepareReservedWords'); const generateWordVectors = require('./tfidf/generateWordVectors'); const calculateTfs = require('./tfidf/calculateTfs'); const calculateIdfs = require('./tfidf/calculateIdfs'); const calculateTfIdfs = require('./tfidf/calculateTfIdfs'); const matrixHasValue = require('./tfidf/matrixHasValue'); const fillMatrix = require('./tfidf/fillMatrix'); const vectorCosCompare = require('./comparers/vectorCosCompare'); const textCosCompare = require('./comparers/textCosCompare'); const arrayCompare = require('./comparers/arrayCompare'); const cleanupUnrelatedResults = require('./tfidf/cleanupUnrelatedResults'); const sortResultsByRelevance = require('./tfidf/sortResultsByRelevance'); const resolveStemmer = require('./tfidf/resolveStemmer'); const calcRelatedPosts = (hexo) => function () { const { log } = hexo; hexo.locals.invalidate(); const config = hexo.config.related_posts; const threshold = config.filter_threshold; const maxPostCount = config.related_count; const weights = config.weight; const stemmersList = config.stemmers; const reservedWords = prepareReservedWords(config.reserved); const stemmers = stemmersList.map(resolveStemmer); const posts = hexo.locals.get('posts').data.map(({ title, author, keywords, description, raw, tags, categories, path }) => ({ title, author, keywords, description, raw, path, tags: tags.data.map((tag) => tag.name), categories: categories.data.map((category) => category.name) })); const postsDictionary = posts.reduce((result, post) => { result[post.path] = post; return result; }, {}); log.info('Calculating of related posts is enabled. Start processing...'); const wordVectors = generateWordVectors(posts, reservedWords, stemmers); // document → word → count const tfs = calculateTfs(wordVectors); // document → word → tf const idfs = calculateIdfs(wordVectors); // word → idf const tfidfs = calculateTfIdfs(tfs, idfs); // document → word → tfidf log.info('TF/IDF is calculated'); const postsMatrix = {}; for (const { path: path1 } of posts) { for (const { path: path2 } of posts) { if (path1 !== path2 && !matrixHasValue(postsMatrix, path1, path2)) { const { title: title1, author: author1, description: description1, keywords: keywords1, tags: tags1, categories: categories1 } = postsDictionary[path1]; const { title: title2, author: author2, description: description2, keywords: keywords2, tags: tags2, categories: categories2 } = postsDictionary[path2]; const titleWeight = (weights.title || 0.05); const authorWeight = (weights.author || 0); const descriptionWeight = (weights.description || 0.05); const keywordsWeight = (weights.keywords || 0.01); const tagsWeight = (weights.tags || 0.005); const categoriesWeight = (weights.categories || 0.005); const textWeight = (weights.text || 1); // calculate compare values only if weight is not 0 const titleCompare = titleWeight > 0 ? textCosCompare(title1, title2, reservedWords, stemmers) : 0; const authorCompare = authorWeight > 0 ? textCosCompare(author1, author2, reservedWords, stemmers) : 0; const descriptionCompare = descriptionWeight > 0 ? textCosCompare(description1, description2, reservedWords, stemmers) : 0; const keywordsCompare = keywordsWeight > 0 ? textCosCompare(keywords1, keywords2, reservedWords, stemmers) : 0; const tagsCompare = tagsWeight > 0 ? arrayCompare(tags1, tags2) : 0; const categoriesCompare = categoriesWeight > 0 ? arrayCompare(categories1, categories2) : 0; const tfIdfCompare = textWeight > 0 ? vectorCosCompare(tfidfs[path1], tfidfs[path2]) * 50 : 0; log.debug('Comparison between %s and %s details: ' + 'title: %s, author: %s, description: %s, ' + 'keywords: %s, tags: %s, categories: %s, tfIdf: %s', magenta(path1), magenta(path2), blue(titleCompare), blue(authorCompare), blue(descriptionCompare), blue(keywordsCompare), blue(tagsCompare), blue(categoriesCompare), blue(tfIdfCompare) ); const value = titleCompare * titleWeight + authorCompare * authorWeight + descriptionCompare * descriptionWeight + keywordsCompare * keywordsWeight + tagsCompare * tagsWeight + categoriesCompare * categoriesWeight + tfIdfCompare * textWeight; log.debug('Comparison between %s and %s is done with result: %s', magenta(path1), magenta(path2), blue(value)); fillMatrix(postsMatrix, path1, path2, value); } } } hexo.related_posts = sortResultsByRelevance(cleanupUnrelatedResults(postsMatrix, threshold), maxPostCount) || {}; log.info('Related post processing done'); }; module.exports = calcRelatedPosts;