From 6c1919c69023bf7e5b59dd5187ac5817fda4433a Mon Sep 17 00:00:00 2001 From: Vitalii Date: Wed, 26 Apr 2017 22:55:57 +0300 Subject: [PATCH 1/2] Update due to pep8 --- DecisionTree/src/dt.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/DecisionTree/src/dt.py b/DecisionTree/src/dt.py index 16ba14f..06b909b 100644 --- a/DecisionTree/src/dt.py +++ b/DecisionTree/src/dt.py @@ -3,31 +3,31 @@ def calcShannonEnt(dataSet): - numEntries=len(dataSet) + numEntries = len(dataSet) - labelCounts={} + labelCounts = {} for featVec in dataSet: - currentLabel=featVec[-1] + currentLabel = featVec[-1] if currentLabel not in labelCounts.keys(): - labelCounts[currentLabel]=0 - labelCounts[currentLabel]+=1 - shannonEnt=0.0 + labelCounts[currentLabel] = 0 + labelCounts[currentLabel] += 1 + shannonEnt = 0.0 for key in labelCounts: - prob =float(labelCounts[key])/numEntries - shannonEnt-=prob*math.log(prob,2) + prob = float(labelCounts[key])/numEntries + shannonEnt -= prob*math.log(prob,2) return shannonEnt def createDataSet(): - dataSet=[[1,0,'man'],[1,1,'man'],[0,1,'man'],[0,0,'women']] - labels=['throat','mustache'] - return dataSet,labels + dataSet = [[1, 0, 'man'], [1, 1, 'man'], [0, 1, 'man'], [0, 0, 'women']] + labels = ['throat', 'mustache'] + return dataSet, labels def splitDataSet(dataSet, axis, value): retDataSet = [] @@ -58,10 +58,6 @@ def chooseBestFeatureToSplit(dataSet): bestInfoGain = infoGain #if better than current best, set to best bestFeature = i return bestFeature #returns an integer - - - - def majorityCnt(classList): classCount={} @@ -99,20 +95,21 @@ def classify(inputTree,featLabels,testVec): valueOfFeat = secondDict[key] if isinstance(valueOfFeat, dict): classLabel = classify(valueOfFeat, featLabels, testVec) - else: classLabel = valueOfFeat + else: + classLabel = valueOfFeat return classLabel def getResult(): - dataSet,labels=createDataSet() + dataSet, labels = createDataSet() # splitDataSet(dataSet,1,1) chooseBestFeatureToSplit(dataSet) # print chooseBestFeatureToSplit(dataSet) #print calcShannonEnt(dataSet) - mtree=createTree(dataSet,labels) + mtree = createTree(dataSet, labels) print mtree - print classify(mtree,['throat','mustache'],[0,0]) + print classify(mtree, ['throat', 'mustache'], [0, 0]) -if __name__=='__main__': +if __name__ == '__main__': getResult() From 8b5dddd907190ce437660e90d7e016c137d75e54 Mon Sep 17 00:00:00 2001 From: Vitalii Date: Fri, 28 Apr 2017 21:40:18 +0300 Subject: [PATCH 2/2] Update AutoNorm.py Due to Pep8 --- AutoNormal/AutoNorm.py | 86 +++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/AutoNormal/AutoNorm.py b/AutoNormal/AutoNorm.py index 4a4d767..4a1df1e 100644 --- a/AutoNormal/AutoNorm.py +++ b/AutoNormal/AutoNorm.py @@ -1,84 +1,84 @@ - from __future__ import division + + def GetAverage(mat): - - n=len(mat) - m= width(mat) + n = len(mat) + m = width(mat) num = [0]*m - for j in range(0,m): + for j in range(m): for i in mat: - num[j]=num[j]+i[j] - num[j]=num[j]/n + num[j] = num[j] + i[j] + num[j] = num[j]/n return num def width(lst): - i=0 + i = 0 for j in lst[0]: - i=i+1 + i = i + 1 return i def GetVar(average,mat): - ListMat=[] + ListMat = [] for i in mat: ListMat.append(list(map(lambda x: x[0]-x[1], zip(average, i)))) - n=len(ListMat) - m= width(ListMat) + n = len(ListMat) + m = width(ListMat) num = [0]*m - for j in range(0,m): + for j in range(m): for i in ListMat: - num[j]=num[j]+(i[j]*i[j]) - num[j]=num[j]/n + num[j] = num[j] + (i[j] * i[j]) + num[j] = num[j]/n return num def DenoisMat(mat): - average=GetAverage(mat) - variance=GetVar(average,mat) - section=list(map(lambda x: x[0]+x[1], zip(average, variance))) + average = GetAverage(mat) + variance = GetVar(average, mat) + section = list(map(lambda x: x[0]+x[1], zip(average, variance))) - n=len(mat) - m= width(mat) + n = len(mat) + m = width(mat) num = [0]*m - denoisMat=[] + denoisMat = [] for i in mat: - for j in range(0,m): - if i[j]>section[j]: - i[j]=section[j] + for j in range(m): + if i[j] > section[j]: + i[j] = section[j] denoisMat.append(i) return denoisMat def AutoNorm(mat): - n=len(mat) - m= width(mat) - MinNum=[9999999999]*m + n = len(mat) + m = width(mat) + MinNum = [9999999999]*m MaxNum = [0]*m for i in mat: - for j in range(0,m): - if i[j]>MaxNum[j]: - MaxNum[j]=i[j] + for j in range(m): + if i[j] > MaxNum[j]: + MaxNum[j] = i[j] for p in mat: - for q in range(0,m): - if p[q]<=MinNum[q]: - MinNum[q]=p[q] + for q in range(m): + if p[q] <= MinNum[q]: + MinNum[q] = p[q] - section=list(map(lambda x: x[0]-x[1], zip(MaxNum, MinNum))) + section = list(map(lambda x: x[0]-x[1], zip(MaxNum, MinNum))) print section - NormMat=[] + NormMat = [] for k in mat: - distance=list(map(lambda x: x[0]-x[1], zip(k, MinNum))) - value=list(map(lambda x: x[0]/x[1], zip(distance,section))) + distance = list(map(lambda x: x[0]-x[1], zip(k, MinNum))) + value = list(map(lambda x: x[0]/x[1], zip(distance,section))) NormMat.append(value) return NormMat -if __name__=='__main__': - mat=[[1,42,512],[4,5,6],[7,8,9],[2,2,2],[2,10,5]] - a=GetAverage(mat) - b=GetVar(a,mat) - print a, +if __name__ == '__main__': + mat = [[1, 42, 512], [4, 5, 6], [7, 8, 9], [2, 2, 2], [2, 10, 5]] + a = GetAverage(mat) + b = GetVar(a,mat) + print a print DenoisMat(mat) # print list(map(lambda x: x[0]-x[1], zip(v2, v1))) - print AutoNorm(mat) \ No newline at end of file + print AutoNorm(mat)