Skip to content

Latest commit

 

History

History
291 lines (264 loc) · 6.34 KB

File metadata and controls

291 lines (264 loc) · 6.34 KB

Processing NA

Set key_replace value

function key_na_by_sample(){
  key_replace='sample';
}
function key_na_by_feture(){
  key_replace='feature';
}
function key_replace_with_mean(){
  key_replace='mean';
}
function key_replace_with_median(){
  key_replace='median';  
}

Replace NA with 0

function dealwithna_replace_with_0(data)
{

  //deep copy
  var data1=transforme_array(data);

//replace na with 0.
  for(var i=0;i<data1.length;i++)
    {
      for(var j=0;j<data1[i].length;j++)
      {
        if(isNaN(data1[i][j]) | data1[i][j]==null){
          data1[i][j]=0;
          //console.log(i+' '+j);
        }
      }
    }
    return data1
}

Replace NA with mean

  • function array_mean_with_na(a)

Calucalte the mean of array with na.

function array_mean_with_na(a){
var counter=0;
var sum=0;
//Test the each element is NaN or null or not.
  for(var i=0;i<a.length;i++)
  {
    if(isNaN(a[i]) | a[i]==null){
      continue;
      }else{//if not, then calculate the number of number and the total of numbers.
        counter+=1;
        sum+=a[i];
      }
  }
  var average=sum/counter;
  return average;
}
function dealwithna_replace_with_mean(data)
{
  //get data;
    /*
  sample1: [1,2,3]
  sample2: [1,2,3]
  [[1,2,3],[1,2,3]]
  */
  //replace na with mean.
  //1. transpose data matrix.
  feature_array=transforme_array(data);
  //deep copy the feature array.
  feature_array_replace_mean=JSON.parse(JSON.stringify(feature_array));
  //2. calculate each feature mean.
  feature_mean_array=[];
  for(var i=0;i<feature_array.length;i++)
    {
      feature_mean_array.push(array_mean_with_na(feature_array[i]));//push each feature mean to array.
    }
    //read each feature value array.
    for(var i=0;i<feature_array_replace_mean.length;i++)
    {
      //loop each value in feature.
      for(var j=0;j<feature_array_replace_mean[i].length;j++)
      {
        //test if the feature is na or null will be replaced with mean.
        if(isNaN(feature_array_replace_mean[i][j]) | feature_array_replace_mean[i][j]==null)
        {
          feature_array_replace_mean[i][j]=feature_mean_array[i];
        }
      }
    }
  return transforme_array(feature_array_replace_mean);
}

Replace NA with median

  • function array_median_with_na(a)
  • return the median of array.
function array_median_with_na(a){
  var median_array=[];
//Test the each element is NaN or null or not.
  for(var i=0;i<a.length;i++)
  {
    if(isNaN(a[i]) | a[i]==null){
      continue;
      }else{//if not, then calculate the number of number and the total of numbers.
      median_array.push(a[i]);  
      }
  }
  // sort the array with small to big.
  media_array_sort=median_array.sort(function(a,b){return a-b});
  //divide with 2 and get remainder.
  if(media_array_sort.length%2==1){//odd number
    var index=Math.round(media_array_sort.length/2)-1;
    // get the median index
    return media_array_sort[index];
  }else{//even number.
    var index=media_array_sort.length/2;
    // get the middle
    var index2=index-1;
    var avg=(media_array_sort[index2]+media_array_sort[index])/2
    return avg;
  }
  
}
function dealwithna_replace_with_median(data)
{
  //get data;
  /*
  sample1: [1,2,3]
  sample2: [1,2,3]
  [[1,2,3],[1,2,3]]
  */
  //1. transpose data matrix.
  feature_array=transforme_array(data);
  //deep copy the feature array.
  feature_array_replace_median=JSON.parse(JSON.stringify(feature_array));
  //2. calculate each feature mean.
  feature_median_array=[];
  for(var i=0;i<feature_array.length;i++)
    {
      feature_median_array.push(array_median_with_na(feature_array[i]));//push each feature mean to array.
    }
    //read each feature value array.
    for(var i=0;i<feature_array_replace_median.length;i++)
    {
      //loop each value in feature.
      for(var j=0;j<feature_array_replace_median[i].length;j++)
      {
        //test if the feature is na or null will be replaced with mean.
        if(isNaN(feature_array_replace_median[i][j]) | feature_array_replace_median[i][j]==null)
        {
          feature_array_replace_median[i][j]=feature_median_array[i];
        }
      }
    }
  return transforme_array(feature_array_replace_median);
}

Remove NA data

Remove feature

function dealwithna_delete_by_feature(data)
{

  //deep copy
  var data1=transforme_array(data);
  var data2=[];
  feature_keep=[];

//replace na with 0.
  for(var i=0;i<data1.length;i++)
    {
      var nanum=0;
      for(var j=0;j<data1[i].length;j++)
      {
        if(isNaN(data1[i][j]) | data1[i][j]==null){
          //data1[i][j]=0;
          //console.log(i+' '+j);
          nanum++;
        }
      }
      if(nanum==0)
      {
        data2.push(data1[i]);
        feature_keep.push(i);
      }
    }
     data2=transforme_array(data2);
    return data2
}

Remove sample

function dealwithna_delete_by_sample(data1)
{

  //deep copy
  //var data1=transforme_array(data);
  var data2=[];
  sample_keep=[];

//replace na with 0.
  for(var i=0;i<data1.length;i++)
    {
      var nanum=0;
      for(var j=0;j<data1[i].length;j++)
      {
        if(isNaN(data1[i][j]) | data1[i][j]==null){
          //data1[i][j]=0;
          //console.log(i+' '+j);
          nanum++;
        }
      }
      if(nanum==0)
      {
        data2.push(data1[i]);
        sample_keep.push(i);
      }
    }
    return data2
}
function key_na_by_sample(){
  key_replace='sample';
}
function key_na_by_feature(){
  key_replace='feature';
}
function key_replace_with_mean(){
  key_replace='mean';
}
function key_replace_with_median(){
  key_replace='median';  
}

Calculate NA Value with disdistance.

function dealwithna_delete_by_feature(data)
{

  //deep copy
  var data1=transforme_array(data);
  var data2=[];

//replace na with 0.
  for(var i=0;i<data1.length;i++)
    {
      var nanum=0;
      for(var j=0;j<data1[i].length;j++)
      {
        if(isNaN(data1[i][j]) | data1[i][j]==null){
          //data1[i][j]=0;
          //console.log(i+' '+j);
          nanum++;
        }
      }
      if(nanum==0)
      {
        data2.push(data1[i]);
      }
    }
     data2=transforme_array(data2);
    return data2
}
  var a=[[1,2,3,4],[1,2,3,'NA'],[3,4,9,4]];
  dealwithna_delete_by_feature(a);