-
Use 5. Performance: AUC, information gain (Entropy), gain ratio Gini index
-
- Function transforme_array(c);
function key_na_by_sample(){
key_replace='sample';
}
function key_na_by_feture(){
key_replace='feature';
}
function key_replace_with_mean(){
key_replace='mean';
}
function key_replace_with_median(){
key_replace='median';
}function dealwithna_replace_with_0(data)
{
//deep copy
var data1=transforme_array(data);
//replace na with 0.
for(var i=0;i<data1.length;i++)
{
for(var j=0;j<data1[i].length;j++)
{
if(isNaN(data1[i][j]) | data1[i][j]==null){
data1[i][j]=0;
//console.log(i+' '+j);
}
}
}
return data1
}- function array_mean_with_na(a)
Calucalte the mean of array with na.
function array_mean_with_na(a){
var counter=0;
var sum=0;
//Test the each element is NaN or null or not.
for(var i=0;i<a.length;i++)
{
if(isNaN(a[i]) | a[i]==null){
continue;
}else{//if not, then calculate the number of number and the total of numbers.
counter+=1;
sum+=a[i];
}
}
var average=sum/counter;
return average;
}function dealwithna_replace_with_mean(data)
{
//get data;
/*
sample1: [1,2,3]
sample2: [1,2,3]
[[1,2,3],[1,2,3]]
*/
//replace na with mean.
//1. transpose data matrix.
feature_array=transforme_array(data);
//deep copy the feature array.
feature_array_replace_mean=JSON.parse(JSON.stringify(feature_array));
//2. calculate each feature mean.
feature_mean_array=[];
for(var i=0;i<feature_array.length;i++)
{
feature_mean_array.push(array_mean_with_na(feature_array[i]));//push each feature mean to array.
}
//read each feature value array.
for(var i=0;i<feature_array_replace_mean.length;i++)
{
//loop each value in feature.
for(var j=0;j<feature_array_replace_mean[i].length;j++)
{
//test if the feature is na or null will be replaced with mean.
if(isNaN(feature_array_replace_mean[i][j]) | feature_array_replace_mean[i][j]==null)
{
feature_array_replace_mean[i][j]=feature_mean_array[i];
}
}
}
return transforme_array(feature_array_replace_mean);
}- function array_median_with_na(a)
- return the median of array.
function array_median_with_na(a){
var median_array=[];
//Test the each element is NaN or null or not.
for(var i=0;i<a.length;i++)
{
if(isNaN(a[i]) | a[i]==null){
continue;
}else{//if not, then calculate the number of number and the total of numbers.
median_array.push(a[i]);
}
}
// sort the array with small to big.
media_array_sort=median_array.sort(function(a,b){return a-b});
//divide with 2 and get remainder.
if(media_array_sort.length%2==1){//odd number
var index=Math.round(media_array_sort.length/2)-1;
// get the median index
return media_array_sort[index];
}else{//even number.
var index=media_array_sort.length/2;
// get the middle
var index2=index-1;
var avg=(media_array_sort[index2]+media_array_sort[index])/2
return avg;
}
}function dealwithna_replace_with_median(data)
{
//get data;
/*
sample1: [1,2,3]
sample2: [1,2,3]
[[1,2,3],[1,2,3]]
*/
//1. transpose data matrix.
feature_array=transforme_array(data);
//deep copy the feature array.
feature_array_replace_median=JSON.parse(JSON.stringify(feature_array));
//2. calculate each feature mean.
feature_median_array=[];
for(var i=0;i<feature_array.length;i++)
{
feature_median_array.push(array_median_with_na(feature_array[i]));//push each feature mean to array.
}
//read each feature value array.
for(var i=0;i<feature_array_replace_median.length;i++)
{
//loop each value in feature.
for(var j=0;j<feature_array_replace_median[i].length;j++)
{
//test if the feature is na or null will be replaced with mean.
if(isNaN(feature_array_replace_median[i][j]) | feature_array_replace_median[i][j]==null)
{
feature_array_replace_median[i][j]=feature_median_array[i];
}
}
}
return transforme_array(feature_array_replace_median);
}function dealwithna_delete_by_feature(data)
{
//deep copy
var data1=transforme_array(data);
var data2=[];
feature_keep=[];
//replace na with 0.
for(var i=0;i<data1.length;i++)
{
var nanum=0;
for(var j=0;j<data1[i].length;j++)
{
if(isNaN(data1[i][j]) | data1[i][j]==null){
//data1[i][j]=0;
//console.log(i+' '+j);
nanum++;
}
}
if(nanum==0)
{
data2.push(data1[i]);
feature_keep.push(i);
}
}
data2=transforme_array(data2);
return data2
}function dealwithna_delete_by_sample(data1)
{
//deep copy
//var data1=transforme_array(data);
var data2=[];
sample_keep=[];
//replace na with 0.
for(var i=0;i<data1.length;i++)
{
var nanum=0;
for(var j=0;j<data1[i].length;j++)
{
if(isNaN(data1[i][j]) | data1[i][j]==null){
//data1[i][j]=0;
//console.log(i+' '+j);
nanum++;
}
}
if(nanum==0)
{
data2.push(data1[i]);
sample_keep.push(i);
}
}
return data2
}
function key_na_by_sample(){
key_replace='sample';
}
function key_na_by_feature(){
key_replace='feature';
}
function key_replace_with_mean(){
key_replace='mean';
}
function key_replace_with_median(){
key_replace='median';
}function dealwithna_delete_by_feature(data)
{
//deep copy
var data1=transforme_array(data);
var data2=[];
//replace na with 0.
for(var i=0;i<data1.length;i++)
{
var nanum=0;
for(var j=0;j<data1[i].length;j++)
{
if(isNaN(data1[i][j]) | data1[i][j]==null){
//data1[i][j]=0;
//console.log(i+' '+j);
nanum++;
}
}
if(nanum==0)
{
data2.push(data1[i]);
}
}
data2=transforme_array(data2);
return data2
}
var a=[[1,2,3,4],[1,2,3,'NA'],[3,4,9,4]];
dealwithna_delete_by_feature(a);