@dxzmpk/js-algorithms-data-structures
Version:
Algorithms and data-structures implemented on JavaScript
133 lines (112 loc) • 5.05 kB
JavaScript
// The string separator that is being used for "word" and "text" concatenation.
const SEPARATOR = '$';
/**
* @param {string} zString
* @return {number[]}
*/
function buildZArray(zString) {
// Initiate zArray and fill it with zeros.
const zArray = new Array(zString.length).fill(null).map(() => 0);
// Z box boundaries.
let zBoxLeftIndex = 0;
let zBoxRightIndex = 0;
// Position of current zBox character that is also a position of
// the same character in prefix.
// For example:
// Z string: ab$xxabxx
// Indices: 012345678
// Prefix: ab.......
// Z box: .....ab..
// Z box shift for 'a' would be 0 (0-position in prefix and 0-position in Z box)
// Z box shift for 'b' would be 1 (1-position in prefix and 1-position in Z box)
let zBoxShift = 0;
// Go through all characters of the zString.
for (let charIndex = 1; charIndex < zString.length; charIndex += 1) {
if (charIndex > zBoxRightIndex) {
// We're OUTSIDE of Z box. In other words this is a case when we're
// starting from Z box of size 1.
// In this case let's make current character to be a Z box of length 1.
zBoxLeftIndex = charIndex;
zBoxRightIndex = charIndex;
// Now let's go and check current and the following characters to see if
// they are the same as a prefix. By doing this we will also expand our
// Z box. For example if starting from current position we will find 3
// more characters that are equal to the ones in the prefix we will expand
// right Z box boundary by 3.
while (
zBoxRightIndex < zString.length
&& zString[zBoxRightIndex - zBoxLeftIndex] === zString[zBoxRightIndex]
) {
// Expanding Z box right boundary.
zBoxRightIndex += 1;
}
// Now we may calculate how many characters starting from current position
// are are the same as the prefix. We may calculate it by difference between
// right and left Z box boundaries.
zArray[charIndex] = zBoxRightIndex - zBoxLeftIndex;
// Move right Z box boundary left by one position just because we've used
// [zBoxRightIndex - zBoxLeftIndex] index calculation above.
zBoxRightIndex -= 1;
} else {
// We're INSIDE of Z box.
// Calculate corresponding Z box shift. Because we want to copy the values
// from zArray that have been calculated before.
zBoxShift = charIndex - zBoxLeftIndex;
// Check if the value that has been already calculated before
// leaves us inside of Z box or it goes beyond the checkbox
// right boundary.
if (zArray[zBoxShift] < (zBoxRightIndex - charIndex) + 1) {
// If calculated value don't force us to go outside Z box
// then we're safe and we may simply use previously calculated value.
zArray[charIndex] = zArray[zBoxShift];
} else {
// In case if previously calculated values forces us to go outside of Z box
// we can't safely copy previously calculated zArray value. It is because
// we are sure that there is no further prefix matches outside of Z box.
// Thus such values must be re-calculated and reduced to certain point.
// To do so we need to shift left boundary of Z box to current position.
zBoxLeftIndex = charIndex;
// And start comparing characters one by one as we normally do for the case
// when we are outside of checkbox.
while (
zBoxRightIndex < zString.length
&& zString[zBoxRightIndex - zBoxLeftIndex] === zString[zBoxRightIndex]
) {
zBoxRightIndex += 1;
}
zArray[charIndex] = zBoxRightIndex - zBoxLeftIndex;
zBoxRightIndex -= 1;
}
}
}
// Return generated zArray.
return zArray;
}
/**
* @param {string} text
* @param {string} word
* @return {number[]}
*/
export default function zAlgorithm(text, word) {
// The list of word's positions in text. Word may be found in the same text
// in several different positions. Thus it is an array.
const wordPositions = [];
// Concatenate word and string. Word will be a prefix to a string.
const zString = `${word}${SEPARATOR}${text}`;
// Generate Z-array for concatenated string.
const zArray = buildZArray(zString);
// Based on Z-array properties each cell will tell us the length of the match between
// the string prefix and current sub-text. Thus we're may find all positions in zArray
// with the number that equals to the length of the word (zString prefix) and based on
// that positions we'll be able to calculate word positions in text.
for (let charIndex = 1; charIndex < zArray.length; charIndex += 1) {
if (zArray[charIndex] === word.length) {
// Since we did concatenation to form zString we need to subtract prefix
// and separator lengths.
const wordPosition = charIndex - word.length - SEPARATOR.length;
wordPositions.push(wordPosition);
}
}
// Return the list of word positions.
return wordPositions;
}