siafun
Version:
A collection of structure induction algorithms
215 lines (214 loc) • 13 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
const _ = require("lodash");
const siafun_1 = require("siafun");
const file_manager_1 = require("../../files/file-manager");
const util_1 = require("../../files/util");
const options_1 = require("../../files/options");
const timeline_analysis_1 = require("../../analysis/timeline-analysis");
const sequence_heuristics_1 = require("../../analysis/sequence-heuristics");
const util_2 = require("../../analysis/util");
const models_1 = require("../../models/models");
const experiment_1 = require("../../files/experiment");
const sequences_1 = require("../../files/sequences");
const util_3 = require("./util");
const leadsheets_1 = require("../../files/leadsheets");
function fullSweep(tlo, songs = util_3.getTunedSongs(), statsFile) {
return __awaiter(this, void 0, void 0, function* () {
const msaConfigs = getSweepConfigs({
//best: median, 1, 0.8/0.8, 0.999/0.01, undefined
modelLength: [models_1.MSA_LENGTH.MEDIAN],
iterations: [1],
edgeInertia: [0.8],
distInertia: [0.8],
matchMatch: [0.999],
deleteInsert: [0.01],
flankProb: [undefined] //, 0.99]//, 0.6, 0.7, 0.8, 0.9, 0.999, 0.999999]
});
const swConfigs = getSweepConfigs({
maxIterations: [1],
//similarityThreshold: .95,
minSegmentLength: [16],
//maxThreshold: [50], //stop when max value below this
nLongest: [10],
maxGapSize: [4],
//maxGaps: 5,
maxGapRatio: [0.25],
minDistance: [4]
});
const sectionConfig = {
numConns: 1,
maskThreshold: .1
};
const ratingFactorNames = sequence_heuristics_1.getFactorNames();
const evalNames = ["originalGround", "originalSeq", "tlModesGround",
"tlModesSeq", "tlGraphGround", "tlGraphSeq", "msaGround", "msaSeq",
"graphGround", "graphSeq"];
const resultNames = ["stateCount", "avgStateP", "probStates", "logP",
"trackP", "rating"].concat(ratingFactorNames).concat(evalNames);
util_1.mapSeries(songs.filter(s => !_.includes(['brokedown_palace', 'friend_of_the_devil',
'mountains_of_the_moon', 'west_l.a._fadeaway'], s)), (song) => __awaiter(this, void 0, void 0, function* () {
return util_1.mapSeries(swConfigs, (swConfig) => __awaiter(this, void 0, void 0, function* () {
let [folders, options] = util_3.getSongFoldersAndOptions(tlo, song);
options.audioFiles = yield util_3.getTunedAudioFiles(song, tlo.count);
options = Object.assign(options, { featuresFolder: folders.features, patternsFolder: folders.patterns });
const swOptions = options_1.getSwOptions(folders.patterns, options.featureOptions, swConfig);
const points = yield util_3.getPoints(options.audioFiles, options.featureOptions);
console.log('saving feature sequences');
const pointsFile = options.filebase + "-points.json";
if (options.multinomial)
sequences_1.saveMultinomialSequences(points, pointsFile, true);
else
sequences_1.saveRawSequences(points, pointsFile);
sequences_1.saveChordLabelSequences(points, options.filebase + "-chords.json", true);
const swColumns = _.clone(swOptions);
delete swColumns.selectedFeatures; //these are either logged in song field or irrelevant...
delete swColumns.quantizerFunctions;
delete swColumns.cacheDir;
const songWithExt = options.filebase.split('/').slice(-1)[0];
const configs = msaConfigs.map(c => Object.assign({ song: songWithExt, model: models_1.getModel(c) }, c, swColumns, sectionConfig));
yield new experiment_1.Experiment("msa sweep " + song + " ", configs, (i) => __awaiter(this, void 0, void 0, function* () {
const msaFile = yield models_1.hmmAlign(pointsFile, util_3.getMSAFolder(options), msaConfigs[i]);
const stats = getMSAStats(msaFile);
const rating = yield timeline_analysis_1.getRatingsFromMSAResult(points, msaFile, alignments);
const allSWEvals = yield getAllSWEvals(song, points, options, msaFile, sectionConfig.numConns, sectionConfig.maskThreshold);
console.log(allSWEvals);
return _.zipObject(resultNames, [stats.totalStates, _.mean(stats.statePs), stats.probableStates,
_.mean(stats.logPs), _.mean(stats.trackPs), rating.rating,
...ratingFactorNames.map(f => rating.factors[f]),
...evalNames.map(e => allSWEvals[e])]);
})).run(statsFile);
/*await mapSeries(configs, async c => {
const msaFile = await hmmAlign(points, getMSAFolder(options), c);
await analysis.saveTimelineFromMSAResults(msaFile);
//TODO SAVE STRUCTURE NOW!! AAND::: NO LOADING OF OUTFILE, NEEDS TO BE CALCULATED
analysis.getStructure();
});*/
}));
}));
});
}
function printOverallMSAStats(tlo) {
return __awaiter(this, void 0, void 0, function* () {
const songs = util_3.getTunedSongs();
const filebases = songs.map(s => util_3.getSongFoldersAndOptions(tlo, s)[1].filebase);
const stats = yield util_1.mapSeries(filebases, (f) => __awaiter(this, void 0, void 0, function* () { return getMSAStats(f + "-msa.json"); }));
console.log("tracks", _.sum(stats.map(s => s.probableTracks)), "of", _.sum(stats.map(s => s.totalTracks)));
console.log("states", _.sum(stats.map(s => s.probableStates)), "of", _.sum(stats.map(s => s.totalStates)));
console.log("trackP", _.mean(_.flatten(stats.map(s => s.trackPs))));
console.log("stateP", _.mean(_.flatten(stats.map(s => s.statePs))));
const analyses = yield util_1.mapSeries(songs, (s) => __awaiter(this, void 0, void 0, function* () {
const [folders, options] = util_3.getSongFoldersAndOptions(tlo, s);
options.audioFiles = yield util_3.getTunedAudioFiles(s, options.count);
const points = yield util_3.getPoints(options.audioFiles, options.featureOptions);
return new timeline_analysis_1.TimelineAnalysis(points, Object.assign(options, { featuresFolder: folders.features, patternsFolder: folders.patterns }));
}));
const ratings = yield util_1.mapSeries(analyses, (a) => __awaiter(this, void 0, void 0, function* () { return a.getPartitionRating(); }));
console.log("rating", _.mean(ratings), util_2.getMedian(ratings));
});
}
function printMSAStats(filepath, full) {
const stats = getMSAStats(filepath);
if (full) {
printStats("logPs:", stats.logPs);
printStats("trackPs:", stats.trackPs);
printStats("statePs:", stats.statePs);
}
console.log("probable tracks:", stats.probableTracks, "of", stats.totalTracks);
console.log("probable states:", stats.probableStates, "of", stats.totalStates);
}
function printStats(name, values) {
console.log(name + ":", "[" + _.min(values) + ", " + _.max(values) + "]", _.mean(values), util_2.getStandardDeviation(values));
}
function getMSAStats(filepath) {
const json = file_manager_1.loadJsonFile(filepath);
const msa = json["msa"];
const logPs = json["logp"];
const trackPs = msa.map(m => m.filter(s => s != "").length / m.length);
const matchStates = _.sortBy(_.uniq(_.flatten(msa))
.filter(s => s.length > 0), s => parseInt(s.slice(1)));
const statePs = matchStates.map(m => _.sum(msa.map(v => v.filter(s => s === m).length)) / msa.length);
const numProbTracks = trackPs.filter(p => p > 0.5).length;
const numProbStates = statePs.filter(p => p > 0.5).length;
return { totalTracks: msa.length, totalStates: matchStates.length,
logPs: logPs, trackPs: trackPs, statePs: statePs,
probableTracks: numProbTracks, probableStates: numProbStates };
}
function getSweepConfigs(configs) {
const product = util_1.cartesianProduct(_.values(configs));
return product.map(p => _.zipObject(Object.keys(configs), p));
}
function getAllSWEvals(song, points, options, msaFile, numConns, maskThreshold) {
return __awaiter(this, void 0, void 0, function* () {
//await analysis.saveTimelineFromMSAResults(msaFile);
const tlModeLabels = yield timeline_analysis_1.getTimelineModeLabels(points, msaFile);
const tlGraphLabels = yield timeline_analysis_1.getTimelineSectionModeLabels(points, msaFile, numConns, maskThreshold);
const timeline = (yield timeline_analysis_1.getPartitionFromMSAResult(points, msaFile)).getPartitions();
const chords = file_manager_1.loadJsonFile(options.filebase + '-chords.json');
const adjustedChords = chords.map((cs, i) => cs.map((c, j) => {
const index = timeline.findIndex(t => t.find(n => n.version == i && n.time == j) != null);
return index >= 0 ? tlModeLabels[index] : c;
}));
const adjustedChords2 = chords.map((cs, i) => cs.map((c, j) => {
const index = timeline.findIndex(t => t.find(n => n.version == i && n.time == j) != null);
return index >= 0 ? tlGraphLabels[index] : c;
}));
const original = chords.map(c => getEvaluation(c, "data/gd_chords/" + song + ".json"));
const tlModes = adjustedChords.map(c => getEvaluation(c, "data/gd_chords/" + song + ".json"));
const tlGraph = adjustedChords2.map(c => getEvaluation(c, "data/gd_chords/" + song + ".json"));
const msa = getEvaluation(tlModeLabels, "data/gd_chords/" + song + ".json");
const graph = getEvaluation(tlGraphLabels, "data/gd_chords/" + song + ".json");
return {
originalGround: _.mean(original.map(o => o.groundP)),
originalSeq: _.mean(original.map(o => o.seqP)),
tlModesGround: _.mean(tlModes.map(o => o.groundP)),
tlModesSeq: _.mean(tlModes.map(o => o.seqP)),
tlGraphGround: _.mean(tlGraph.map(o => o.groundP)),
tlGraphSeq: _.mean(tlGraph.map(o => o.seqP)),
msaGround: msa.groundP,
msaSeq: msa.seqP,
graphGround: graph.groundP,
graphSeq: graph.seqP
};
});
}
exports.getAllSWEvals = getAllSWEvals;
function evaluateSeparateChords(tlo, songs = util_3.getTunedSongs(), statsFile) {
util_1.mapSeries(songs, (song) => __awaiter(this, void 0, void 0, function* () {
let [_folders, options] = util_3.getSongFoldersAndOptions(tlo, song);
const chords = file_manager_1.loadJsonFile(options.filebase + '-chords.json');
const evals = chords.map(c => getEvaluation(c, "data/gd_chords/" + song + ".json"));
console.log(JSON.stringify(evals.map(e => e.groundP)));
}));
}
function evaluate(outputFile, leadsheetFile) {
const groundtruth = leadsheets_1.getStandardChordSequence(leadsheetFile, true);
const result = _.flattenDeep(file_manager_1.loadJsonFile(outputFile));
console.log(JSON.stringify(groundtruth));
console.log(JSON.stringify(result));
const vocab = _.uniq(_.concat(groundtruth, result));
const numeric = (s) => s.map(v => [vocab.indexOf(v)]);
const path = siafun_1.getSimpleSmithWatermanPath(numeric(groundtruth), numeric(result), {
//fillGaps?: boolean,
//onlyDiagonals: true
});
//console.log(JSON.stringify(path.map(([i,j]) => [groundtruth[i], result[j]])));
console.log(groundtruth.length, result.length, path.length);
console.log(path.length / groundtruth.length, path.length / result.length);
return outputFile;
}
function getEvaluation(sequence, leadSheetFile) {
const groundtruth = leadsheets_1.getStandardChordSequence(leadSheetFile, true);
const vocab = _.uniq(_.concat(groundtruth, sequence));
const numeric = (s) => s.map(v => [vocab.indexOf(v)]);
const path = siafun_1.getSimpleSmithWatermanPath(numeric(groundtruth), numeric(sequence), {});
return { groundP: path.length / groundtruth.length, seqP: path.length / sequence.length };
}