From d2b4683b9bebd1a2c6b02fade2a0ba441371e852 Mon Sep 17 00:00:00 2001 From: Tarek Allam Date: Tue, 19 May 2020 13:24:03 +0100 Subject: [PATCH 1/3] Updating Python code from python2 to python3 This has been completed using the tool: 2to3 Note, "Structured_APIs-Chapter_6_Working_with_Different_Types_of_Data.py" could not be updated due to parsing error: Error: RefactoringTool: There was 1 error: RefactoringTool: Can't parse Structured_APIs-Chapter_6_Working_with_Different_Types_of_Data.py: ParseError: bad input: type=1, value='as', context=(' ', (334, 69)) Thus, the command run was: $ 2to3 -w `ls *.py | grep -v *Chapter_6*.py` See PR #58 for stdout of command for detailed changes modified: A_Gentle_Introduction_to_Spark-Chapter_3_A_Tour_of_Sparks_Toolset.py modified: Advanced_Analytics_and_Machine_Learning-Chapter_24_Advanced_Analytics_and_Machine_Learning.py modified: Advanced_Analytics_and_Machine_Learning-Chapter_25_Preprocessing_and_Feature_Engineering.py modified: Advanced_Analytics_and_Machine_Learning-Chapter_26_Classification.py modified: Advanced_Analytics_and_Machine_Learning-Chapter_27_Regression.py modified: Advanced_Analytics_and_Machine_Learning-Chapter_28_Recommendation.py modified: Advanced_Analytics_and_Machine_Learning-Chapter_29_Unsupervised_Learning.py modified: Advanced_Analytics_and_Machine_Learning-Chapter_30_Graph_Analysis.py modified: Advanced_Analytics_and_Machine_Learning-Chapter_31_Deep_Learning.py modified: Ecosystem-Chapter_32_Language_Specifics.py modified: Low_Level_APIs-Chapter_12_RDD_Basics.py modified: Low_Level_APIs-Chapter_13_Advanced_RDDs.py modified: Production_Applications-Chapter_16_Spark_Applications.py modified: Structured_APIs-Chapter_5_Basic_Structured_Operations.py modified: Structured_APIs-Chapter_9_Data_Sources.py --- ...park-Chapter_3_A_Tour_of_Sparks_Toolset.py | 2 +- ...Advanced_Analytics_and_Machine_Learning.py | 2 +- ...5_Preprocessing_and_Feature_Engineering.py | 2 +- ...hine_Learning-Chapter_26_Classification.py | 22 +++++++-------- ..._Machine_Learning-Chapter_27_Regression.py | 28 +++++++++---------- ...hine_Learning-Chapter_28_Recommendation.py | 4 +-- ...arning-Chapter_29_Unsupervised_Learning.py | 12 ++++---- ...hine_Learning-Chapter_30_Graph_Analysis.py | 6 ++-- ...chine_Learning-Chapter_31_Deep_Learning.py | 4 +-- ...Ecosystem-Chapter_32_Language_Specifics.py | 2 +- code/Low_Level_APIs-Chapter_12_RDD_Basics.py | 2 +- ...Low_Level_APIs-Chapter_13_Advanced_RDDs.py | 11 ++++---- ...lications-Chapter_16_Spark_Applications.py | 2 +- ...s-Chapter_5_Basic_Structured_Operations.py | 4 +-- .../Structured_APIs-Chapter_9_Data_Sources.py | 4 +-- 15 files changed, 54 insertions(+), 53 deletions(-) diff --git a/code/A_Gentle_Introduction_to_Spark-Chapter_3_A_Tour_of_Sparks_Toolset.py b/code/A_Gentle_Introduction_to_Spark-Chapter_3_A_Tour_of_Sparks_Toolset.py index e80dea23..70cff5f2 100644 --- a/code/A_Gentle_Introduction_to_Spark-Chapter_3_A_Tour_of_Sparks_Toolset.py +++ b/code/A_Gentle_Introduction_to_Spark-Chapter_3_A_Tour_of_Sparks_Toolset.py @@ -128,7 +128,7 @@ from pyspark.ml.clustering import KMeans kmeans = KMeans()\ .setK(20)\ - .setSeed(1L) + .setSeed(1) # COMMAND ---------- diff --git a/code/Advanced_Analytics_and_Machine_Learning-Chapter_24_Advanced_Analytics_and_Machine_Learning.py b/code/Advanced_Analytics_and_Machine_Learning-Chapter_24_Advanced_Analytics_and_Machine_Learning.py index 996ee036..af34f998 100644 --- a/code/Advanced_Analytics_and_Machine_Learning-Chapter_24_Advanced_Analytics_and_Machine_Learning.py +++ b/code/Advanced_Analytics_and_Machine_Learning-Chapter_24_Advanced_Analytics_and_Machine_Learning.py @@ -38,7 +38,7 @@ # COMMAND ---------- -print lr.explainParams() +print(lr.explainParams()) # COMMAND ---------- diff --git a/code/Advanced_Analytics_and_Machine_Learning-Chapter_25_Preprocessing_and_Feature_Engineering.py b/code/Advanced_Analytics_and_Machine_Learning-Chapter_25_Preprocessing_and_Feature_Engineering.py index d0bdab82..03b95eca 100644 --- a/code/Advanced_Analytics_and_Machine_Learning-Chapter_25_Preprocessing_and_Feature_Engineering.py +++ b/code/Advanced_Analytics_and_Machine_Learning-Chapter_25_Preprocessing_and_Feature_Engineering.py @@ -252,7 +252,7 @@ result = model.transform(documentDF) for row in result.collect(): text, vector = row - print("Text: [%s] => \nVector: %s\n" % (", ".join(text), str(vector))) + print(("Text: [%s] => \nVector: %s\n" % (", ".join(text), str(vector)))) # COMMAND ---------- diff --git a/code/Advanced_Analytics_and_Machine_Learning-Chapter_26_Classification.py b/code/Advanced_Analytics_and_Machine_Learning-Chapter_26_Classification.py index 1e9dffc1..12b98a25 100644 --- a/code/Advanced_Analytics_and_Machine_Learning-Chapter_26_Classification.py +++ b/code/Advanced_Analytics_and_Machine_Learning-Chapter_26_Classification.py @@ -6,20 +6,20 @@ from pyspark.ml.classification import LogisticRegression lr = LogisticRegression() -print lr.explainParams() # see all parameters +print(lr.explainParams()) # see all parameters lrModel = lr.fit(bInput) # COMMAND ---------- -print lrModel.coefficients -print lrModel.intercept +print(lrModel.coefficients) +print(lrModel.intercept) # COMMAND ---------- summary = lrModel.summary -print summary.areaUnderROC +print(summary.areaUnderROC) summary.roc.show() summary.pr.show() @@ -33,7 +33,7 @@ from pyspark.ml.classification import DecisionTreeClassifier dt = DecisionTreeClassifier() -print dt.explainParams() +print(dt.explainParams()) dtModel = dt.fit(bInput) @@ -41,7 +41,7 @@ from pyspark.ml.classification import RandomForestClassifier rfClassifier = RandomForestClassifier() -print rfClassifier.explainParams() +print(rfClassifier.explainParams()) trainedModel = rfClassifier.fit(bInput) @@ -49,7 +49,7 @@ from pyspark.ml.classification import GBTClassifier gbtClassifier = GBTClassifier() -print gbtClassifier.explainParams() +print(gbtClassifier.explainParams()) trainedModel = gbtClassifier.fit(bInput) @@ -57,7 +57,7 @@ from pyspark.ml.classification import NaiveBayes nb = NaiveBayes() -print nb.explainParams() +print(nb.explainParams()) trainedModel = nb.fit(bInput.where("label != 0")) @@ -72,9 +72,9 @@ # COMMAND ---------- -print metrics.areaUnderPR -print metrics.areaUnderROC -print "Receiver Operating Characteristic" +print(metrics.areaUnderPR) +print(metrics.areaUnderROC) +print("Receiver Operating Characteristic") metrics.roc.toDF().show() diff --git a/code/Advanced_Analytics_and_Machine_Learning-Chapter_27_Regression.py b/code/Advanced_Analytics_and_Machine_Learning-Chapter_27_Regression.py index 6a55a7cd..a4bd0b66 100644 --- a/code/Advanced_Analytics_and_Machine_Learning-Chapter_27_Regression.py +++ b/code/Advanced_Analytics_and_Machine_Learning-Chapter_27_Regression.py @@ -5,7 +5,7 @@ from pyspark.ml.regression import LinearRegression lr = LinearRegression().setMaxIter(10).setRegParam(0.3).setElasticNetParam(0.8) -print lr.explainParams() +print(lr.explainParams()) lrModel = lr.fit(df) @@ -13,10 +13,10 @@ summary = lrModel.summary summary.residuals.show() -print summary.totalIterations -print summary.objectiveHistory -print summary.rootMeanSquaredError -print summary.r2 +print(summary.totalIterations) +print(summary.objectiveHistory) +print(summary.rootMeanSquaredError) +print(summary.r2) # COMMAND ---------- @@ -28,7 +28,7 @@ .setMaxIter(10)\ .setRegParam(0.3)\ .setLinkPredictionCol("linkOut") -print glr.explainParams() +print(glr.explainParams()) glrModel = glr.fit(df) @@ -36,7 +36,7 @@ from pyspark.ml.regression import DecisionTreeRegressor dtr = DecisionTreeRegressor() -print dtr.explainParams() +print(dtr.explainParams()) dtrModel = dtr.fit(df) @@ -45,10 +45,10 @@ from pyspark.ml.regression import RandomForestRegressor from pyspark.ml.regression import GBTRegressor rf = RandomForestRegressor() -print rf.explainParams() +print(rf.explainParams()) rfModel = rf.fit(df) gbt = GBTRegressor() -print gbt.explainParams() +print(gbt.explainParams()) gbtModel = gbt.fit(df) @@ -79,11 +79,11 @@ out = model.transform(df)\ .select("prediction", "label").rdd.map(lambda x: (float(x[0]), float(x[1]))) metrics = RegressionMetrics(out) -print "MSE: " + str(metrics.meanSquaredError) -print "RMSE: " + str(metrics.rootMeanSquaredError) -print "R-squared: " + str(metrics.r2) -print "MAE: " + str(metrics.meanAbsoluteError) -print "Explained variance: " + str(metrics.explainedVariance) +print("MSE: " + str(metrics.meanSquaredError)) +print("RMSE: " + str(metrics.rootMeanSquaredError)) +print("R-squared: " + str(metrics.r2)) +print("MAE: " + str(metrics.meanAbsoluteError)) +print("Explained variance: " + str(metrics.explainedVariance)) # COMMAND ---------- diff --git a/code/Advanced_Analytics_and_Machine_Learning-Chapter_28_Recommendation.py b/code/Advanced_Analytics_and_Machine_Learning-Chapter_28_Recommendation.py index 10274a8e..842320d6 100644 --- a/code/Advanced_Analytics_and_Machine_Learning-Chapter_28_Recommendation.py +++ b/code/Advanced_Analytics_and_Machine_Learning-Chapter_28_Recommendation.py @@ -15,7 +15,7 @@ .setUserCol("userId")\ .setItemCol("movieId")\ .setRatingCol("rating") -print als.explainParams() +print(als.explainParams()) alsModel = als.fit(training) predictions = alsModel.transform(test) @@ -36,7 +36,7 @@ .setLabelCol("rating")\ .setPredictionCol("prediction") rmse = evaluator.evaluate(predictions) -print("Root-mean-square error = %f" % rmse) +print(("Root-mean-square error = %f" % rmse)) # COMMAND ---------- diff --git a/code/Advanced_Analytics_and_Machine_Learning-Chapter_29_Unsupervised_Learning.py b/code/Advanced_Analytics_and_Machine_Learning-Chapter_29_Unsupervised_Learning.py index 2459d456..efd46a07 100644 --- a/code/Advanced_Analytics_and_Machine_Learning-Chapter_29_Unsupervised_Learning.py +++ b/code/Advanced_Analytics_and_Machine_Learning-Chapter_29_Unsupervised_Learning.py @@ -18,14 +18,14 @@ from pyspark.ml.clustering import KMeans km = KMeans().setK(5) -print km.explainParams() +print(km.explainParams()) kmModel = km.fit(sales) # COMMAND ---------- summary = kmModel.summary -print summary.clusterSizes # number of points +print(summary.clusterSizes) # number of points kmModel.computeCost(sales) centers = kmModel.clusterCenters() print("Cluster Centers: ") @@ -43,7 +43,7 @@ # COMMAND ---------- summary = bkmModel.summary -print summary.clusterSizes # number of points +print(summary.clusterSizes) # number of points kmModel.computeCost(sales) centers = kmModel.clusterCenters() print("Cluster Centers: ") @@ -55,14 +55,14 @@ from pyspark.ml.clustering import GaussianMixture gmm = GaussianMixture().setK(5) -print gmm.explainParams() +print(gmm.explainParams()) model = gmm.fit(sales) # COMMAND ---------- summary = model.summary -print model.weights +print(model.weights) model.gaussiansDF.show() summary.cluster.show() summary.clusterSizes @@ -89,7 +89,7 @@ from pyspark.ml.clustering import LDA lda = LDA().setK(10).setMaxIter(5) -print lda.explainParams() +print(lda.explainParams()) model = lda.fit(prepped) diff --git a/code/Advanced_Analytics_and_Machine_Learning-Chapter_30_Graph_Analysis.py b/code/Advanced_Analytics_and_Machine_Learning-Chapter_30_Graph_Analysis.py index dcb416da..6b785d93 100644 --- a/code/Advanced_Analytics_and_Machine_Learning-Chapter_30_Graph_Analysis.py +++ b/code/Advanced_Analytics_and_Machine_Learning-Chapter_30_Graph_Analysis.py @@ -21,9 +21,9 @@ # COMMAND ---------- -print "Total Number of Stations: " + str(stationGraph.vertices.count()) -print "Total Number of Trips in Graph: " + str(stationGraph.edges.count()) -print "Total Number of Trips in Original Data: " + str(tripData.count()) +print("Total Number of Stations: " + str(stationGraph.vertices.count())) +print("Total Number of Trips in Graph: " + str(stationGraph.edges.count())) +print("Total Number of Trips in Original Data: " + str(tripData.count())) # COMMAND ---------- diff --git a/code/Advanced_Analytics_and_Machine_Learning-Chapter_31_Deep_Learning.py b/code/Advanced_Analytics_and_Machine_Learning-Chapter_31_Deep_Learning.py index b00f2423..9d8abd65 100644 --- a/code/Advanced_Analytics_and_Machine_Learning-Chapter_31_Deep_Learning.py +++ b/code/Advanced_Analytics_and_Machine_Learning-Chapter_31_Deep_Learning.py @@ -38,8 +38,8 @@ from pyspark.ml.evaluation import MulticlassClassificationEvaluator tested_df = p_model.transform(test_df) evaluator = MulticlassClassificationEvaluator(metricName="accuracy") -print("Test set accuracy = " + str(evaluator.evaluate(tested_df.select( - "prediction", "label")))) +print(("Test set accuracy = " + str(evaluator.evaluate(tested_df.select( + "prediction", "label"))))) # COMMAND ---------- diff --git a/code/Ecosystem-Chapter_32_Language_Specifics.py b/code/Ecosystem-Chapter_32_Language_Specifics.py index d5bfeb7e..5ae3d2c1 100644 --- a/code/Ecosystem-Chapter_32_Language_Specifics.py +++ b/code/Ecosystem-Chapter_32_Language_Specifics.py @@ -1,5 +1,5 @@ import pandas as pd -df = pd.DataFrame({"first":range(200), "second":range(50,250)}) +df = pd.DataFrame({"first":list(range(200)), "second":list(range(50,250))}) # COMMAND ---------- diff --git a/code/Low_Level_APIs-Chapter_12_RDD_Basics.py b/code/Low_Level_APIs-Chapter_12_RDD_Basics.py index 0826f51f..7b69a17e 100644 --- a/code/Low_Level_APIs-Chapter_12_RDD_Basics.py +++ b/code/Low_Level_APIs-Chapter_12_RDD_Basics.py @@ -62,7 +62,7 @@ def startsWithS(individual): # COMMAND ---------- -spark.sparkContext.parallelize(range(1, 21)).reduce(lambda x, y: x + y) # 210 +spark.sparkContext.parallelize(list(range(1, 21))).reduce(lambda x, y: x + y) # 210 # COMMAND ---------- diff --git a/code/Low_Level_APIs-Chapter_13_Advanced_RDDs.py b/code/Low_Level_APIs-Chapter_13_Advanced_RDDs.py index b4bd8832..9c73cd84 100644 --- a/code/Low_Level_APIs-Chapter_13_Advanced_RDDs.py +++ b/code/Low_Level_APIs-Chapter_13_Advanced_RDDs.py @@ -25,16 +25,17 @@ # COMMAND ---------- -keyword.keys().collect() -keyword.values().collect() +list(keyword.keys()).collect() +list(keyword.values()).collect() # COMMAND ---------- import random +from functools import reduce distinctChars = words.flatMap(lambda word: list(word.lower())).distinct()\ .collect() -sampleMap = dict(map(lambda c: (c, random.random()), distinctChars)) +sampleMap = dict([(c, random.random()) for c in distinctChars]) words.map(lambda word: (word.lower()[0], word))\ .sampleByKey(True, sampleMap, 6).collect() @@ -47,7 +48,7 @@ def maxFunc(left, right): return max(left, right) def addFunc(left, right): return left + right -nums = sc.parallelize(range(1,31), 5) +nums = sc.parallelize(list(range(1,31)), 5) # COMMAND ---------- @@ -121,7 +122,7 @@ def mergeCombinerFunc(vals1, vals2): # COMMAND ---------- -numRange = sc.parallelize(range(10), 2) +numRange = sc.parallelize(list(range(10)), 2) words.zip(numRange).collect() diff --git a/code/Production_Applications-Chapter_16_Spark_Applications.py b/code/Production_Applications-Chapter_16_Spark_Applications.py index 7e215209..78cd8598 100644 --- a/code/Production_Applications-Chapter_16_Spark_Applications.py +++ b/code/Production_Applications-Chapter_16_Spark_Applications.py @@ -1,4 +1,4 @@ -from __future__ import print_function + if __name__ == '__main__': from pyspark.sql import SparkSession spark = SparkSession.builder \ diff --git a/code/Structured_APIs-Chapter_5_Basic_Structured_Operations.py b/code/Structured_APIs-Chapter_5_Basic_Structured_Operations.py index b301fa66..af83c6ac 100644 --- a/code/Structured_APIs-Chapter_5_Basic_Structured_Operations.py +++ b/code/Structured_APIs-Chapter_5_Basic_Structured_Operations.py @@ -190,8 +190,8 @@ from pyspark.sql import Row schema = df.schema newRows = [ - Row("New Country", "Other Country", 5L), - Row("New Country 2", "Other Country 3", 1L) + Row("New Country", "Other Country", 5), + Row("New Country 2", "Other Country 3", 1) ] parallelizedRows = spark.sparkContext.parallelize(newRows) newDF = spark.createDataFrame(parallelizedRows, schema) diff --git a/code/Structured_APIs-Chapter_9_Data_Sources.py b/code/Structured_APIs-Chapter_9_Data_Sources.py index 0c110053..7a611506 100644 --- a/code/Structured_APIs-Chapter_9_Data_Sources.py +++ b/code/Structured_APIs-Chapter_9_Data_Sources.py @@ -112,8 +112,8 @@ # COMMAND ---------- colName = "count" -lowerBound = 0L -upperBound = 348113L # this is the max count in our database +lowerBound = 0 +upperBound = 348113 # this is the max count in our database numPartitions = 10 From 7119a3ed71f59ff9bc48ac9ac05a2e2d4d1e76a5 Mon Sep 17 00:00:00 2001 From: Tarek Allam Date: Tue, 19 May 2020 13:52:19 +0100 Subject: [PATCH 2/3] Reverting unnecessary change in previous commit This reverts the changes made in 17655ce to 3 specific files as the changed introduced by 2to3 script are not required for the code to run in Python 3. This has been tested locally in a Jupyter notebook running Python 3.7.6 REF: - https://github.com/databricks/Spark-The-Definitive-Guide/commit/17655cef9aa2f88d6ba47eca935370fba720c23f modified: code/Ecosystem-Chapter_32_Language_Specifics.py modified: code/Low_Level_APIs-Chapter_12_RDD_Basics.py modified: code/Low_Level_APIs-Chapter_13_Advanced_RDDs.py --- code/Ecosystem-Chapter_32_Language_Specifics.py | 2 +- code/Low_Level_APIs-Chapter_12_RDD_Basics.py | 2 +- code/Low_Level_APIs-Chapter_13_Advanced_RDDs.py | 11 +++++------ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/code/Ecosystem-Chapter_32_Language_Specifics.py b/code/Ecosystem-Chapter_32_Language_Specifics.py index 5ae3d2c1..d5bfeb7e 100644 --- a/code/Ecosystem-Chapter_32_Language_Specifics.py +++ b/code/Ecosystem-Chapter_32_Language_Specifics.py @@ -1,5 +1,5 @@ import pandas as pd -df = pd.DataFrame({"first":list(range(200)), "second":list(range(50,250))}) +df = pd.DataFrame({"first":range(200), "second":range(50,250)}) # COMMAND ---------- diff --git a/code/Low_Level_APIs-Chapter_12_RDD_Basics.py b/code/Low_Level_APIs-Chapter_12_RDD_Basics.py index 7b69a17e..0826f51f 100644 --- a/code/Low_Level_APIs-Chapter_12_RDD_Basics.py +++ b/code/Low_Level_APIs-Chapter_12_RDD_Basics.py @@ -62,7 +62,7 @@ def startsWithS(individual): # COMMAND ---------- -spark.sparkContext.parallelize(list(range(1, 21))).reduce(lambda x, y: x + y) # 210 +spark.sparkContext.parallelize(range(1, 21)).reduce(lambda x, y: x + y) # 210 # COMMAND ---------- diff --git a/code/Low_Level_APIs-Chapter_13_Advanced_RDDs.py b/code/Low_Level_APIs-Chapter_13_Advanced_RDDs.py index 9c73cd84..b4bd8832 100644 --- a/code/Low_Level_APIs-Chapter_13_Advanced_RDDs.py +++ b/code/Low_Level_APIs-Chapter_13_Advanced_RDDs.py @@ -25,17 +25,16 @@ # COMMAND ---------- -list(keyword.keys()).collect() -list(keyword.values()).collect() +keyword.keys().collect() +keyword.values().collect() # COMMAND ---------- import random -from functools import reduce distinctChars = words.flatMap(lambda word: list(word.lower())).distinct()\ .collect() -sampleMap = dict([(c, random.random()) for c in distinctChars]) +sampleMap = dict(map(lambda c: (c, random.random()), distinctChars)) words.map(lambda word: (word.lower()[0], word))\ .sampleByKey(True, sampleMap, 6).collect() @@ -48,7 +47,7 @@ def maxFunc(left, right): return max(left, right) def addFunc(left, right): return left + right -nums = sc.parallelize(list(range(1,31)), 5) +nums = sc.parallelize(range(1,31), 5) # COMMAND ---------- @@ -122,7 +121,7 @@ def mergeCombinerFunc(vals1, vals2): # COMMAND ---------- -numRange = sc.parallelize(list(range(10)), 2) +numRange = sc.parallelize(range(10), 2) words.zip(numRange).collect() From e5514731ed8013da7c08c792578a20beb8baae11 Mon Sep 17 00:00:00 2001 From: Tarek Allam Date: Tue, 19 May 2020 13:57:32 +0100 Subject: [PATCH 3/3] Removing surplus brackets from print function The 2to3 script added extra brackets around print functions that were already compatible with Python 3. This has now been removed modified: code/Advanced_Analytics_and_Machine_Learning-Chapter_25_Preprocessing_and_Feature_Engineering.py modified: code/Advanced_Analytics_and_Machine_Learning-Chapter_28_Recommendation.py modified: code/Advanced_Analytics_and_Machine_Learning-Chapter_31_Deep_Learning.py --- ...arning-Chapter_25_Preprocessing_and_Feature_Engineering.py | 2 +- ...nalytics_and_Machine_Learning-Chapter_28_Recommendation.py | 2 +- ...Analytics_and_Machine_Learning-Chapter_31_Deep_Learning.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/code/Advanced_Analytics_and_Machine_Learning-Chapter_25_Preprocessing_and_Feature_Engineering.py b/code/Advanced_Analytics_and_Machine_Learning-Chapter_25_Preprocessing_and_Feature_Engineering.py index 03b95eca..d0bdab82 100644 --- a/code/Advanced_Analytics_and_Machine_Learning-Chapter_25_Preprocessing_and_Feature_Engineering.py +++ b/code/Advanced_Analytics_and_Machine_Learning-Chapter_25_Preprocessing_and_Feature_Engineering.py @@ -252,7 +252,7 @@ result = model.transform(documentDF) for row in result.collect(): text, vector = row - print(("Text: [%s] => \nVector: %s\n" % (", ".join(text), str(vector)))) + print("Text: [%s] => \nVector: %s\n" % (", ".join(text), str(vector))) # COMMAND ---------- diff --git a/code/Advanced_Analytics_and_Machine_Learning-Chapter_28_Recommendation.py b/code/Advanced_Analytics_and_Machine_Learning-Chapter_28_Recommendation.py index 842320d6..d455031e 100644 --- a/code/Advanced_Analytics_and_Machine_Learning-Chapter_28_Recommendation.py +++ b/code/Advanced_Analytics_and_Machine_Learning-Chapter_28_Recommendation.py @@ -36,7 +36,7 @@ .setLabelCol("rating")\ .setPredictionCol("prediction") rmse = evaluator.evaluate(predictions) -print(("Root-mean-square error = %f" % rmse)) +print("Root-mean-square error = %f" % rmse) # COMMAND ---------- diff --git a/code/Advanced_Analytics_and_Machine_Learning-Chapter_31_Deep_Learning.py b/code/Advanced_Analytics_and_Machine_Learning-Chapter_31_Deep_Learning.py index 9d8abd65..b00f2423 100644 --- a/code/Advanced_Analytics_and_Machine_Learning-Chapter_31_Deep_Learning.py +++ b/code/Advanced_Analytics_and_Machine_Learning-Chapter_31_Deep_Learning.py @@ -38,8 +38,8 @@ from pyspark.ml.evaluation import MulticlassClassificationEvaluator tested_df = p_model.transform(test_df) evaluator = MulticlassClassificationEvaluator(metricName="accuracy") -print(("Test set accuracy = " + str(evaluator.evaluate(tested_df.select( - "prediction", "label"))))) +print("Test set accuracy = " + str(evaluator.evaluate(tested_df.select( + "prediction", "label")))) # COMMAND ----------