node-apriori
Version:
Apriori frequent itemset mining algorithm implementation in TypeScript / JavaScript.
190 lines (165 loc) • 7.65 kB
text/typescript
import { EventEmitter } from 'events';
interface ItemsCount {
[stringifiedItem: string]: number
}
export interface IAprioriEvents<T> {
on(event: 'data', listener: (itemset: Itemset<T>) => void): this;
on(event: string, listener: Function): this;
}
export interface IAprioriResults<T> {
itemsets: Itemset<T>[],
executionTime: number
}
export interface Itemset<T> {
items: T[],
support: number
}
export class Apriori<T> extends EventEmitter implements IAprioriEvents<T> {
private _transactions: T[][];
/**
* Apriori is an algorithm for frequent item set mining and association rule
* earning over transactional databases.
* It proceeds by identifying the frequent individual items in the given set of transactions
* and extending them to larger and larger item sets as long as those item sets appear
* sufficiently often in the database.
*
* @param {number} _support 0 < _support < 1. Minimum support of itemsets to mine.
*/
constructor( private _support: number /*, private _confidence: number*/ ) {
super();
}
/**
* Executes the Apriori Algorithm.
* You can keep track of frequent itemsets as they are mined by listening to the 'data' event on the Apriori object.
* All mined itemsets, as well as basic execution stats, are returned at the end of the execution through a callback function or a Promise.
*
* @param {T[][]} transactions The transactions from which you want to mine itemsets.
* @param {IAprioriResults<T>} cb Callback function returning the results.
* @return {Promise<IAprioriResults<T>>} Promise returning the results.
*/
public exec( transactions: T[][], cb?: (result: IAprioriResults<T>) => any ): Promise<IAprioriResults<T>> {
this._transactions = transactions;
// Relative support.
this._support = Math.ceil(this._support * transactions.length);
return new Promise<IAprioriResults<T>>( (resolve, reject) => {
let time = process.hrtime();
// Generate frequent one-itemsets.
let frequentItemsets: Itemset<T>[][] = [ this.getFrequentOneItemsets(this._transactions) ];
let i: number = 0;
// Generate frequent (i+1)-itemsets.
while( frequentItemsets[i].length > 0 ) {
frequentItemsets.push( this.getFrequentKItemsets(frequentItemsets[i]) );
i++;
}
let elapsedTime = process.hrtime(time);
// Formatting results.
let result: IAprioriResults<T> = {
itemsets: [].concat.apply([], frequentItemsets),
executionTime: Math.round((elapsedTime[0]*1000)+(elapsedTime[1]/1000000))
};
if(cb) cb(result);
resolve(result);
});
}
/**
* Returns frequent one-itemsets from a given set of transactions.
*
* @param {T[][]} transactions Your set of transactions.
* @return {Itemset<T>[]} Frequent one-itemsets.
*/
private getFrequentOneItemsets( transactions: T[][] ): Itemset<T>[] {
// This generates one-itemset candidates.
let count: ItemsCount = this._getDistinctItemsCount(transactions);
return Object.keys(count)
.reduce<Itemset<T>[]>( (ret: Itemset<T>[], stringifiedItem: string) => {
// Returning pruned one-itemsets.
if( count[stringifiedItem] >= this._support ) {
let frequentItemset: Itemset<T> = {
support: count[stringifiedItem],
items: [JSON.parse(stringifiedItem)]
};
ret.push(frequentItemset);
this.emit('data', frequentItemset)
}
return ret;
}, []);
}
/**
* Returns frequent (k = n+1)-itemsets from a given array of frequent n-itemsets.
*
* @param {Itemset<T>[]} frequentNItemsets Previously determined n-itemsets.
* @return {Itemset<T>[]} Frequent k-itemsets.
*/
private getFrequentKItemsets( frequentNItemsets: Itemset<T>[] ): Itemset<T>[] {
// Trivial precondition.
if(!frequentNItemsets.length) return [];
// Size of frequent itemsets we want to determine.
let k: number = frequentNItemsets[0].items.length + 1;
// Get unique items from these itemsets (Brute-force approach).
let items: T[] = frequentNItemsets
.reduce<T[]>( (items: T[], itemset: Itemset<T>) => items.concat(itemset.items), [])
.filter( (item: T, index: number, that: T[]) => that.indexOf(item) === index );
// Generating candidates and counting their occurence.
return this._getCandidatesCount( this._generateKCandidates(items,k) )
// Pruning candidates.
.filter( (itemset: Itemset<T>) => {
let isFrequent: boolean = itemset.support >= this._support;
if(isFrequent) this.emit('data', itemset);
return isFrequent;
});
}
/**
* Returns all combinations (itemset candidates) of size k from a given set of items.
*
* @param {T[]} items The set of items of which you want the combinations.
* @param {number} k Size of combinations you want.
* @return {Itemset<T>[]} Array of itemset candidates.
*/
private _generateKCandidates( items: T[], k: number): Itemset<T>[] {
// Trivial preconditions over k.
if(k > items.length || k <= 0) return [];
if(k == items.length) return [{items: items, support: 0}];
if(k == 1) return items.map( (item: T) => {
return { items: [item], support: 0 };
});
let ret: Itemset<T>[] = [];
for( let i: number = 0; i < items.length - k + 1; i++) {
let head: T[] = items.slice(i, i + 1);
this._generateKCandidates(items.slice(i + 1), k - 1)
.forEach( (tailcomb: Itemset<T>) => ret.push({
items: head.concat(tailcomb.items),
support: 0
}));
}
return ret;
}
/**
* Populates an Itemset array with their support in the given set of transactions.
*
* @param {Itemset<T>[]} candidates The itemset candidates to populate with their support.
* @return {Itemset<T>[]} The support-populated collection of itemsets.
*/
private _getCandidatesCount( candidates: Itemset<T>[] ): Itemset<T>[] {
this._transactions.forEach( (transaction: T[]) => {
candidates.forEach( (candidate: Itemset<T>) => {
let includesCandidate: boolean = candidate.items.every( (item: T) => transaction.indexOf(item) !== -1 );
if(includesCandidate) candidate.support += 1;
})
});
return candidates;
}
/**
* Returns the occurence of single items in a given set of transactions.
*
* @param {T[][]} transactions The set of transaction.
* @return {ItemsCount} Count of items (stringified items as keys).
*/
private _getDistinctItemsCount( transactions: T[][] ): ItemsCount {
return transactions.reduce<ItemsCount>( (count: ItemsCount, arr: T[]) => {
return arr.reduce<ItemsCount>( (count: ItemsCount, item: T) => {
count[JSON.stringify(item)] = (count[JSON.stringify(item)] || 0) + 1;
return count;
}, count);
}, {});
}
}