article-parser
Version:
To extract main article from given URL
67 lines (53 loc) • 2.08 kB
JavaScript
/**
* Based on string-similarity by aceakash
* Original: https://github.com/aceakash/string-similarity
* Convert to es6 module syntax to fix error on bun.js
**/
const areArgsValid = (mainString, targetStrings) => {
if (typeof mainString !== 'string') return false
if (!Array.isArray(targetStrings)) return false
if (!targetStrings.length) return false
if (targetStrings.find(function (s) { return typeof s !== 'string' })) return false
return true
}
const compareTwoStrings = (first, second) => {
first = first.replace(/\s+/g, '')
second = second.replace(/\s+/g, '')
if (first === second) return 1 // identical or empty
if (first.length < 2 || second.length < 2) return 0 // if either is a 0-letter or 1-letter string
const firstBigrams = new Map()
for (let i = 0; i < first.length - 1; i++) {
const bigram = first.substring(i, i + 2)
const count = firstBigrams.has(bigram)
? firstBigrams.get(bigram) + 1
: 1
firstBigrams.set(bigram, count)
};
let intersectionSize = 0
for (let i = 0; i < second.length - 1; i++) {
const bigram = second.substring(i, i + 2)
const count = firstBigrams.has(bigram)
? firstBigrams.get(bigram)
: 0
if (count > 0) {
firstBigrams.set(bigram, count - 1)
intersectionSize++
}
}
return (2.0 * intersectionSize) / (first.length + second.length - 2)
}
export const findBestMatch = (mainString, targetStrings) => {
if (!areArgsValid(mainString, targetStrings)) throw new Error('Bad arguments: First argument should be a string, second should be an array of strings')
const ratings = []
let bestMatchIndex = 0
for (let i = 0; i < targetStrings.length; i++) {
const currentTargetString = targetStrings[i]
const currentRating = compareTwoStrings(mainString, currentTargetString)
ratings.push({ target: currentTargetString, rating: currentRating })
if (currentRating > ratings[bestMatchIndex].rating) {
bestMatchIndex = i
}
}
const bestMatch = ratings[bestMatchIndex]
return { ratings, bestMatch, bestMatchIndex }
}