generics.js
Version:
A minimal library for Deep learning for the web
355 lines (330 loc) • 11.1 kB
JavaScript
const csv = require('csv-parser');
const fs = require('fs');
const Convolution =require("./Convolution.js");
var json={};
function check_neglect(neglect_arr,obj){
if(neglect_arr.includes(obj)){
return true;
}else{
return false;
}
}
function shuffleArray(array1,array2) {
for (let i = array1.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[array1[i], array1[j]] = [array1[j], array1[i]];
[array2[i], array2[j]] = [array2[j], array2[i]];
}
}
function reduce(numberToReduce,limitNumber) {
return numberToReduce/1000;
}
function get_max(d_arr){
var max=-1;
for(var i=0;i<d_arr.length;i++){
var num=Math.max.apply(null,d_arr[i]);
if(num>max){
max=num;
}
}
return max;
}
function divide_by_max(arr,max){
for(var i=0;i<arr.length;i++){
for(var j=0;j<arr[i].length;j++){
arr[i][j]=arr[i][j]/max;
}
}
}
function hash_row(row,x_axis,y_axis,fill_type,fill_with){
if(fill_type==1){
//fill with the user give number
for(var i=0;i<x_axis.length;i++){
if(row[x_axis[i]]==null||row[x_axis[i]]==undefined || row[x_axis[i]].length==0){
row[x_axis[i]]=fill_with["x_axis"][i];
}
}
for(var i=0;i<y_axis.length;i++){
if(row[y_axis[i]]==null||row[y_axis[i]]==undefined || row[y_axis[i]].length==0){
row[y_axis[i]]=fill_with["y_axis"][i];
}
}
}else if(fill_type==2){
//fill with a random value..
for(var i=0;i<x_axis.length;i++){
if(row[x_axis[i]]==null||row[x_axis[i]]==undefined || row[x_axis[i]].length==0){
row[x_axis[i]]=Math.random()*100;
}
}
for(var i=0;i<y_axis.length;i++){
if(row[y_axis[i]]==null||row[y_axis[i]]==undefined || row[y_axis[i]].length==0){
row[y_axis[i]]=Math.random()*100;
}
}
}else{
if(fill_type!=0){
throw new Error("Invalid fill type.");
}
}
for(var i=0;i<x_axis.length;i++){
//check with respect to fill type..
if(json[x_axis[i]]==undefined && isNaN(Number(row[x_axis[i]]))==true){
json[x_axis[i]]=[];
}
//check if available..
if(!isNaN(Number(row[x_axis[i]]))==true){
row[x_axis[i]]=Number(row[x_axis[i]]);
}else{
if(json[x_axis[i]].includes(row[x_axis[i]])){
//change the value accoringly
row[x_axis[i]]=json[x_axis[i]].indexOf(row[x_axis[i]]);
}else{
//add the value...
json[x_axis[i]].push(row[x_axis[i]]);
row[x_axis[i]]=(json[x_axis[i]].length-1);
}
}
}
for(var i=0;i<y_axis.length;i++){
if(json[y_axis[i]]==undefined && isNaN(Number(row[y_axis[i]]))==true){
json[y_axis[i]]=[];
}
//check if available..
if(!isNaN(Number(row[y_axis[i]]))==true){
row[y_axis[i]]=Number(row[y_axis[i]]);
}else{
if(json[y_axis[i]].indexOf(row[y_axis[i]])!=-1){
//change the value accoringly
row[y_axis[i]]=json[y_axis[i]].indexOf(row[y_axis[i]]);
}else{
//add the value...
json[y_axis[i]].push(row[y_axis[i]]);
row[y_axis[i]]=(json[y_axis[i]].length-1);
}
}
}
return row;
}
function image_pre_process(x_axis,y_axis,dir,img_length=500,img_height=500,kernel_size=2,conv_options="direct",conv_kernel=null,find_files,callback){
var j_arr=[];
var conv=new Convolution();
find_files(dir).then(function(files){
files.forEach(function(file){
find_files(dir+file).then(function(images){
images.forEach(function(class_label) {
var obj={};
obj["dir"]=dir+file+"/"+class_label;
obj["label"]=file;
j_arr.push(obj);
if(files[files.length-1]==file && images[images.length-1]==class_label){
j_arr.forEach(function(obj) {
conv.image_flatten(obj["dir"],img_length,img_height,kernel_size,conv_options,conv_kernel).then(function(array) {
if(array!=null){
array.forEach(function(x) {
x_axis.push(x);
var y=[obj["label"]];
y_axis.push(y);
if(callback!=null){
callback(x_axis.length);
}
});
}else{
throw new Error("Error to preprocess the files.");
}
});
});
}
});
});
});
});
}
function find_filesX(dir,threashold) {
let result = []
let files = fs.readdirSync(dir)
result = files.splice(0,threashold);
for (var i = 0; i < result.length;i++){
result[i] = dir+result[i];
}
return result;
}
function get_sampling(x_axis,image_dir,i,threashold,dir,img_length,img_height,kernel_size,conv_options,conv_kernel) {
var conv = new Convolution();
return new Promise(function(resolve, reject){
conv.image_flatten(image_dir,img_length,img_height,kernel_size,conv_options,conv_kernel).then(function (array) {
array.forEach(function (x) {
var lbl = keys[i];
x_axis[lbl.toString()].push(x);
resolve(i);
});
});
});
}
function sub_sampling(x_axis,threashold,dir, img_length, img_height, kernel_size,conv_options,conv_kernel) {
for (var i = 0; i < keys.length; i++) {
var lbl = keys[i];
x_axis[lbl.toString()] = [];
}
for (var i = 0; i < keys.length; i++) {
var images = find_filesX(dir + keys[i] + "/", threashold);
images.forEach(function (image_dir,idx,array) {
get_sampling(x_axis,image_dir,i,threashold,dir,img_length,img_height,kernel_size,conv_options,conv_kernel).then(function (index) {
console.info("\n Processed with key : " + keys[index]);
});
});
}
}
function encoding(y_axis) {
//get the unique values..
if(typeof(y_axis[0][0])=="object"){
throw new Error("Objects cannot be encoded..");
}
var arr=y_axis.map(item => item[0])
.filter((value, index, self) => self.indexOf(value) === index)
for(var i=0;i<y_axis.length;i++){
var ar=[];
ar[0]=arr.indexOf(y_axis[i][0])/(arr.length-1);
y_axis[i]=ar;
}
var json={};
json["y_axis"]=y_axis;
json["key"]=arr;
return json;
}
/**
* A class for all pre processing activities
*/
class Preprocessing{
/**
* To pre process the image.
* @param {array} x_axis : The x_axis will be filled as the image is getting processed .
* @param {array} y_axis : The y_axis will be filled as the image is getting processed .
* @param {string} dir : The directory where the model must be saved.
* @param {number} img_length : The length of the image.
* @param {number} img_height : The height of the image.
* @param {number} kernel_size : The size of the maxpooling kernel.
* @param {string} conv_options : Refer : https://image-js.github.io/image-js/#imageconvolution
* @param {array} conv_kernel : The kernel which must be used to process the image.
* @param {function} find_files : A callback function which is needed to find images in a directory defined by a user.
* @param {function} callback : A callback function which takes a argument number_of_images_processed.
*/
image_pre_process(x_axis,y_axis,dir,img_length=500,img_height=500,kernel_size=2,conv_options="direct",conv_kernel=null,find_files=null,callback=null){
if(find_files==null){
throw new Error("The function to find files (find_files) is not passed.Please do add a function with your constraint needed.");
}
return image_pre_process(x_axis,y_axis,dir,img_length,img_height,kernel_size,conv_options,conv_kernel,find_files,callback);
}
/**
* A class for label encoding .
* @param {array} y_axis : The y_axis is the output for which we need the predictions.
* @returns {array} result : The result can be used to classify the data.
*/
async label_encoding(y_axis){
return encoding(y_axis);
}
/**
* To get the samples of images for qualy check and data visualization or other purposes.
* @param {JSON} x_axis : A empty JSON which will be filled as the image is getting processed.
* @param {number} threashold : The threashold is the sample images taken from each class.
* @param {string} dir : The directory where the model must be saved.
* @param {number} img_length : The length of the image.
* @param {number} img_height : The height of the image.
* @param {number} kernel_size : The size of the maxpooling kernel.
* @param {string} conv_options : Refer : https://image-js.github.io/image-js/#imageconvolution
* @param {array} conv_kernel : The kernel which must be used to process the image.
*/
sub_sampling(x_axis,threashold,dir, img_length, img_height, kernel_size,conv_options,conv_kernel){
return sub_sampling(x_axis,threashold,dir, img_length, img_height, kernel_size,conv_options,conv_kernel);
}
/**
* To parse the csv and convet it to JSON .
* @param {string} dir : A empty JSON which will be filled as the image is getting processed.
* @param {number} fill_type :
* If fill_type==0 : Reject the row .
* If fill_type==1 : Fill with the user give number which is given in fill_json (the last param)
* If fill_type==2 : If fill with random values.
* @param {array} x_axis : The directory where the model must be saved.
* @param {array} y_axis : The length of the image.
* @param {number} maximum_val : The height of the image.
* @param {JSON} fill_json : The Array you to fill with (will be applicable if fill_type==1 will be selected.)
Example
var fill_json={
"x_axis":[0,0,0,0,0,0,0,0,0,0,0,0,0,0],
"y_axis":[0]
};
*/
async parse_csv(dir,fill_type,x_axis,y_axis,maximum_val,fill_json=null){
return new Promise(function(resolve, reject) {
json={};
var y=[];
var x=[];
var row_length=-1;
fs.createReadStream(dir)
.pipe(csv())
.on('data', (row) => {
if(row_length==-1){
row_length=Object.keys(row).length;
}
if(Object.keys(row).length!=row_length){
throw new Error("The rows length does not match in the csv file.");
}
var flag=1;
if(fill_type==0){
//reject the row..
for(var i=0;i<x_axis.length;i++){
if(row[x_axis[i]]==null||row[x_axis[i]]==undefined || row[x_axis[i]].length==0){
flag=0;
break;
}
}
for(var i=0;i<y_axis.length;i++){
if(row[y_axis[i]]==null||row[y_axis[i]]==undefined || row[y_axis[i]].length==0){
flag=0;
break;
}
}
}
//hash y axis
//hash x axis..
if(flag==1){
var arr=[];
row=hash_row(row,x_axis,y_axis,fill_type,fill_json);
y_axis.forEach(function(obj){
arr.push(row[obj]);
});
y.push(arr);
arr=[];
x_axis.forEach(function(obj){
arr.push(row[obj]);
});
x.push(arr);
}
})
.on('end', () => {
if(maximum_val==undefined){
var max=get_max(x);
var max2=get_max(y);
if(max<max2){
max=max2;
}
if(max!=0){
divide_by_max(x,max);
divide_by_max(y,max);
}
maximum_val=max;
}else{
divide_by_max(x,maximum_val);
divide_by_max(y,maximum_val);
}
//shuffle the elements..
shuffleArray(x,y);
console.log('CSV file successfully processed');
json["x_axis"]=x;
json["y_axis"]=y;
json["max_val"]=maximum_val;
resolve(json);
});
});
}
}
module.exports = Preprocessing