ts-content-based-recommender
Version:
A TypeScript-based content-based recommender with multilingual support (Japanese & English). Forked from content-based-recommender.
264 lines • 13.5 kB
JavaScript
import { expect } from 'chai';
import ContentBasedRecommender from '../src/lib/ContentBasedRecommender.js';
import sampleDocuments from '../fixtures/sample-documents.js';
import sampleTargetDocuments from '../fixtures/sample-target-documents.js';
import sampleJapaneseDocuments from '../fixtures/sample-japanese-documents.js';
/**
* ContentBasedRecommenderのテストスイート
* オプション検証、文書検証、学習結果検証、エクスポート/インポート機能をテスト
*/
describe('ContentBasedRecommender', () => {
describe('options validation', () => {
it('should only accept maxVectorSize greater than 0', () => {
expect(() => {
const recommender = new ContentBasedRecommender({
maxVectorSize: -1,
});
recommender.train(sampleDocuments);
}).to.throw('The option maxVectorSize should be integer and greater than 0');
});
it('should only accept maxSimilarDocuments greater than 0', () => {
expect(() => {
const recommender = new ContentBasedRecommender({
maxSimilarDocuments: -1,
});
recommender.train(sampleDocuments);
}).to.throw('The option maxSimilarDocuments should be integer and greater than 0');
});
it('should only accept minScore between 0 and 1', () => {
expect(() => {
const recommender = new ContentBasedRecommender({
minScore: -1,
});
recommender.train(sampleDocuments);
}).to.throw('The option minScore should be a number between 0 and 1');
expect(() => {
const recommender = new ContentBasedRecommender({
minScore: 2,
});
recommender.train(sampleDocuments);
}).to.throw('The option minScore should be a number between 0 and 1');
});
});
describe('documents validation', () => {
const recommender = new ContentBasedRecommender();
it('should only accept array of documents', async () => {
try {
await recommender.train({
1000001: 'Hello World',
1000002: 'I love programming!',
});
expect.fail('Should have thrown an error');
}
catch (error) {
expect(error.message).to.equal('Documents should be an array of objects');
}
});
it('should only accept array of documents, with fields id and content', async () => {
try {
await recommender.train([
{
name: '1000001',
text: 'Hello World'
},
{
name: '1000002',
text: 'I love programming!'
},
]);
expect.fail('Should have thrown an error');
}
catch (error) {
expect(error.message).to.equal('Documents should be have fields id and content');
}
});
});
describe('training result validation', () => {
it('should return list of similar documents in right order', async () => {
const recommender = new ContentBasedRecommender();
await recommender.train(sampleDocuments);
const similarDocuments = recommender.getSimilarDocuments('1000002');
const ids = similarDocuments.map(document => document.id);
expect(ids).to.deep.equal(['1000004', '1000005', '1000009', '1000003', '1000006', '1000001']);
});
it('should to be able to control how many similar documents to obtain', async () => {
const recommender = new ContentBasedRecommender();
await recommender.train(sampleDocuments);
let similarDocuments = recommender.getSimilarDocuments('1000002', 0, 2);
let ids = similarDocuments.map(document => document.id);
expect(ids).to.deep.equal(['1000004', '1000005']);
similarDocuments = recommender.getSimilarDocuments('1000002', 2);
ids = similarDocuments.map(document => document.id);
expect(ids).to.deep.equal(['1000009', '1000003', '1000006', '1000001']);
similarDocuments = recommender.getSimilarDocuments('1000002', 1, 3);
ids = similarDocuments.map(document => document.id);
expect(ids).to.deep.equal(['1000005', '1000009', '1000003']);
});
it('should to be able to control the minScore of similar documents', async () => {
const recommender = new ContentBasedRecommender({ minScore: 0.4 });
await recommender.train(sampleDocuments);
sampleDocuments.forEach((document) => {
const similarDocuments = recommender.getSimilarDocuments(document.id);
const scores = similarDocuments.map(similarDocument => similarDocument.score);
scores.forEach((score) => {
expect(score).to.be.at.least(0.4);
});
});
});
it('should to be able to control the maximum number of similar documents', async () => {
const recommender = new ContentBasedRecommender({ maxSimilarDocuments: 3 });
await recommender.train(sampleDocuments);
sampleDocuments.forEach((document) => {
const similarDocuments = recommender.getSimilarDocuments(document.id);
expect(similarDocuments).to.have.length.at.most(3);
});
});
});
describe('training multi collection result validation', () => {
it('should return list of similar documents of the target collection in right order', async () => {
const recommender = new ContentBasedRecommender();
await recommender.trainBidirectional(sampleDocuments, sampleTargetDocuments);
const similarDocuments = recommender.getSimilarDocuments('1000011');
const ids = similarDocuments.map(document => document.id);
expect(ids).to.deep.equal(['1000002', '1000004', '1000005', '1000009', '1000003', '1000006', '1000001']);
});
it('should to be able to control how many similar documents to obtain using multiple collections', async () => {
const recommender = new ContentBasedRecommender();
await recommender.trainBidirectional(sampleDocuments, sampleTargetDocuments);
let similarDocuments = recommender.getSimilarDocuments('1000011', 0, 2);
let ids = similarDocuments.map(document => document.id);
expect(ids).to.deep.equal(['1000002', '1000004']);
similarDocuments = recommender.getSimilarDocuments('1000011', 2);
ids = similarDocuments.map(document => document.id);
expect(ids).to.deep.equal(['1000005', '1000009', '1000003', '1000006', '1000001']);
similarDocuments = recommender.getSimilarDocuments('1000011', 1, 3);
ids = similarDocuments.map(document => document.id);
expect(ids).to.deep.equal(['1000004', '1000005', '1000009']);
});
it('should to be able to control the minScore of similar documents', async () => {
const recommender = new ContentBasedRecommender({ minScore: 0.4 });
await recommender.train(sampleDocuments);
sampleDocuments.forEach((document) => {
const similarDocuments = recommender.getSimilarDocuments(document.id);
const scores = similarDocuments.map(similarDocument => similarDocument.score);
scores.forEach((score) => {
expect(score).to.be.at.least(0.4);
});
});
});
it('should to be able to control the maximum number of similar documents', async () => {
const recommender = new ContentBasedRecommender({ maxSimilarDocuments: 3 });
await recommender.train(sampleDocuments);
sampleDocuments.forEach((document) => {
const similarDocuments = recommender.getSimilarDocuments(document.id);
expect(similarDocuments).to.have.length.at.most(3);
});
});
});
describe('export and import', () => {
it('should to be able to give the same results with recommender created by import method', async () => {
const recommender = new ContentBasedRecommender({
maxSimilarDocuments: 3,
minScore: 0.4,
});
await recommender.train(sampleDocuments);
const exportedData = recommender.export();
// エクスポート結果に基づいて別の推薦システムを作成
const recommender2 = new ContentBasedRecommender(exportedData.options);
recommender2.import(exportedData);
sampleDocuments.forEach((document) => {
const similarDocuments = recommender.getSimilarDocuments(document.id);
const similarDocuments2 = recommender2.getSimilarDocuments(document.id);
expect(similarDocuments).to.deep.equal(similarDocuments2);
});
});
});
describe('Japanese language support', () => {
it('should accept language option "ja"', () => {
expect(() => {
new ContentBasedRecommender({
language: 'ja',
});
}).to.not.throw();
});
it('should reject invalid language option', () => {
expect(() => {
new ContentBasedRecommender({
language: 'fr',
});
}).to.throw('The option language should be either "en" or "ja"');
});
it('should successfully train with Japanese documents', async () => {
const recommender = new ContentBasedRecommender({
language: 'ja',
debug: false,
minScore: 0.0,
});
// 日本語文書での学習をテスト
await recommender.train(sampleJapaneseDocuments);
// 類似文書を取得できることを確認
const similarDocuments = recommender.getSimilarDocuments('jp1000001');
expect(similarDocuments).to.be.an('array');
}).timeout(10000); // タイムアウトを10秒に設定
it('should find similarities between Japanese documents with common keywords', async () => {
const recommender = new ContentBasedRecommender({
language: 'ja',
minScore: 0.1,
});
// より関連性の高い文書でテスト
const documents = [
{
id: '1',
content: 'JavaScriptプログラミングは楽しいです',
},
{
id: '2',
content: 'JavaScript開発の基礎知識を学びます',
},
{
id: '3',
content: 'プログラミング言語の比較検討',
},
];
await recommender.train(documents);
// 「プログラミング」というキーワードを共有する文書間で類似度が検出されることを確認
const similarToDoc1 = recommender.getSimilarDocuments('1');
const similarToDoc3 = recommender.getSimilarDocuments('3');
expect(similarToDoc1).to.be.an('array');
expect(similarToDoc3).to.be.an('array');
// 文書1と文書3が「プログラミング」で関連付けられることを確認
const doc1SimilarIds = similarToDoc1.map(doc => doc.id);
const doc3SimilarIds = similarToDoc3.map(doc => doc.id);
expect(doc1SimilarIds).to.include('3');
expect(doc3SimilarIds).to.include('1');
}).timeout(10000);
it('should process Japanese morphological analysis correctly', async () => {
const recommender = new ContentBasedRecommender({
language: 'ja',
});
// 機械学習に関連する文書を用意
const mlDocuments = [
{
id: 'ml1',
content: '機械学習の基礎概念について説明します',
},
{
id: 'ml2',
content: 'ディープラーニングは機械学習の一分野です',
},
{
id: 'ml3',
content: '自然言語処理における機械学習の応用',
},
];
await recommender.train(mlDocuments);
// 「機械学習」というキーワードを含む文書間で類似度が計算されることを確認
const similarToMl1 = recommender.getSimilarDocuments('ml1');
expect(similarToMl1).to.be.an('array');
// 機械学習関連の文書が類似文書として検出されることを期待
const relatedIds = similarToMl1.map(doc => doc.id);
expect(relatedIds).to.include.oneOf(['ml2', 'ml3']);
}).timeout(10000);
});
});
//# sourceMappingURL=ContentBasedRecommender.js.map